add web
This commit is contained in:
172
scanner/diff_parser.py
Normal file
172
scanner/diff_parser.py
Normal file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Diff 解析器 - 将扫描问题与代码片段关联
|
||||
"""
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodeChunk:
|
||||
"""代码块"""
|
||||
file_path: str
|
||||
old_content: str = ""
|
||||
new_content: str = ""
|
||||
old_start: int = 0
|
||||
new_start: int = 0
|
||||
hunks: List[Dict] = field(default_factory=list)
|
||||
|
||||
|
||||
class DiffParser:
|
||||
"""Diff 解析器"""
|
||||
|
||||
def __init__(self, diff_text: str):
|
||||
self.diff_text = diff_text
|
||||
self.files: Dict[str, CodeChunk] = {}
|
||||
self._parse()
|
||||
|
||||
def _parse(self):
|
||||
"""解析 diff 文本"""
|
||||
if not self.diff_text:
|
||||
return
|
||||
|
||||
current_chunk = None
|
||||
lines = self.diff_text.split('\n')
|
||||
for line in lines:
|
||||
diff_match = re.match(r'diff --git a/(.+) b/(.+)', line)
|
||||
if diff_match:
|
||||
file_path = diff_match.group(1)
|
||||
current_chunk = CodeChunk(file_path=file_path)
|
||||
self.files[file_path] = current_chunk
|
||||
continue
|
||||
|
||||
hunk_match = re.match(r'@@ -(\d+),?\d* \+(\d+),?\d* @@', line)
|
||||
if hunk_match and current_chunk:
|
||||
current_chunk.old_start = int(hunk_match.group(1))
|
||||
current_chunk.new_start = int(hunk_match.group(2))
|
||||
continue
|
||||
|
||||
if current_chunk and line:
|
||||
if line.startswith('+') and not line.startswith('+++'):
|
||||
current_chunk.new_content += line[1:] + '\n'
|
||||
elif line.startswith('-') and not line.startswith('---'):
|
||||
current_chunk.old_content += line[1:] + '\n'
|
||||
elif line.startswith(' '):
|
||||
current_chunk.old_content += line[1:] + '\n'
|
||||
current_chunk.new_content += line[1:] + '\n'
|
||||
|
||||
def get_file_content(self, file_path: str) -> Optional[CodeChunk]:
|
||||
return self.files.get(file_path)
|
||||
|
||||
def get_line_context(self, file_path: str, line_number: int, context_lines: int = 3) -> Optional[Dict[str, Any]]:
|
||||
chunk = self.files.get(file_path)
|
||||
if not chunk:
|
||||
return None
|
||||
|
||||
new_lines = chunk.new_content.split('\n')
|
||||
if line_number > len(new_lines):
|
||||
return None
|
||||
|
||||
start = max(0, line_number - context_lines - 1)
|
||||
end = min(len(new_lines), line_number + context_lines)
|
||||
|
||||
context = []
|
||||
for i in range(start, end):
|
||||
code = new_lines[i].rstrip('\n')
|
||||
is_current_line = (i == line_number - 1)
|
||||
context.append({
|
||||
'line_number': chunk.new_start + i,
|
||||
'code': code,
|
||||
'is_issue_line': is_current_line
|
||||
})
|
||||
|
||||
return {
|
||||
'file': file_path,
|
||||
'line': line_number,
|
||||
'context': context
|
||||
}
|
||||
|
||||
|
||||
def merge_issues_with_code(scan_results: Dict[str, Any], diff: str) -> Dict[str, Any]:
|
||||
"""将扫描问题与代码片段关联"""
|
||||
if not diff:
|
||||
return scan_results
|
||||
|
||||
parser = DiffParser(diff)
|
||||
enriched_results = {
|
||||
'scanners': [],
|
||||
'summary': scan_results.get('summary', {}),
|
||||
'total_issues': scan_results.get('total_issues', 0)
|
||||
}
|
||||
|
||||
for scanner_name, scanner_data in scan_results.items():
|
||||
if scanner_name in ['summary', 'total_issues', 'ai']:
|
||||
continue
|
||||
|
||||
if isinstance(scanner_data, dict):
|
||||
enriched_scanner = {
|
||||
'name': scanner_name,
|
||||
'issues': [],
|
||||
'file_count': scanner_data.get('file_count', 0),
|
||||
'total_issues': scanner_data.get('total_issues', 0)
|
||||
}
|
||||
|
||||
issues = scanner_data.get('issues', [])
|
||||
for issue in issues:
|
||||
enriched_issue = enrich_issue_with_code(issue, parser)
|
||||
enriched_scanner['issues'].append(enriched_issue)
|
||||
|
||||
enriched_results['scanners'].append(enriched_scanner)
|
||||
|
||||
if 'ai' in scan_results:
|
||||
enriched_results['ai'] = scan_results['ai']
|
||||
|
||||
return enriched_results
|
||||
|
||||
|
||||
def enrich_issue_with_code(issue: Dict[str, Any], parser: DiffParser) -> Dict[str, Any]:
|
||||
"""为单个问题添加代码片段"""
|
||||
enriched = issue.copy()
|
||||
|
||||
file_path = issue.get('file', '')
|
||||
line_number = issue.get('line', 0)
|
||||
|
||||
if not file_path:
|
||||
return enriched
|
||||
|
||||
if not line_number:
|
||||
desc = issue.get('description', '') or issue.get('message', '')
|
||||
line_match = re.search(r'line[:#]?\s*(\d+)', desc, re.IGNORECASE)
|
||||
if line_match:
|
||||
line_number = int(line_match.group(1))
|
||||
|
||||
matched_path = None
|
||||
for path in parser.files.keys():
|
||||
if file_path.endswith(path) or path.endswith(file_path) or file_path in path:
|
||||
matched_path = path
|
||||
break
|
||||
|
||||
if matched_path:
|
||||
enriched['file'] = matched_path
|
||||
if matched_path and line_number:
|
||||
context = parser.get_line_context(matched_path, line_number)
|
||||
if context:
|
||||
enriched['code_context'] = context
|
||||
|
||||
if 'code_context' not in enriched and matched_path:
|
||||
chunk = parser.get_file_content(matched_path)
|
||||
if chunk and chunk.new_content:
|
||||
lines = chunk.new_content.split('\n')[:10]
|
||||
enriched['code_context'] = {
|
||||
'file': matched_path,
|
||||
'line': line_number or 1,
|
||||
'preview': '\n'.join(lines),
|
||||
'has_more': len(chunk.new_content.split('\n')) > 10
|
||||
}
|
||||
|
||||
return enriched
|
||||
Reference in New Issue
Block a user