add web

2026-03-11 21:16:47 +08:00
parent 459a8cb295
commit 14680f053e
8 changed files with 1557 additions and 39 deletions
--- a/scanner/diff_parser.py
+++ b/scanner/diff_parser.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Diff 解析器 - 将扫描问题与代码片段关联
+"""
+import re
+import logging
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, field
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CodeChunk:
+    """代码块"""
+    file_path: str
+    old_content: str = ""
+    new_content: str = ""
+    old_start: int = 0
+    new_start: int = 0
+    hunks: List[Dict] = field(default_factory=list)
+
+
+class DiffParser:
+    """Diff 解析器"""
+
+    def __init__(self, diff_text: str):
+        self.diff_text = diff_text
+        self.files: Dict[str, CodeChunk] = {}
+        self._parse()
+
+    def _parse(self):
+        """解析 diff 文本"""
+        if not self.diff_text:
+            return
+
+        current_chunk = None
+        lines = self.diff_text.split('\n')
+        for line in lines:
+            diff_match = re.match(r'diff --git a/(.+) b/(.+)', line)
+            if diff_match:
+                file_path = diff_match.group(1)
+                current_chunk = CodeChunk(file_path=file_path)
+                self.files[file_path] = current_chunk
+                continue
+
+            hunk_match = re.match(r'@@ -(\d+),?\d* \+(\d+),?\d* @@', line)
+            if hunk_match and current_chunk:
+                current_chunk.old_start = int(hunk_match.group(1))
+                current_chunk.new_start = int(hunk_match.group(2))
+                continue
+
+            if current_chunk and line:
+                if line.startswith('+') and not line.startswith('+++'):
+                    current_chunk.new_content += line[1:] + '\n'
+                elif line.startswith('-') and not line.startswith('---'):
+                    current_chunk.old_content += line[1:] + '\n'
+                elif line.startswith(' '):
+                    current_chunk.old_content += line[1:] + '\n'
+                    current_chunk.new_content += line[1:] + '\n'
+
+    def get_file_content(self, file_path: str) -> Optional[CodeChunk]:
+        return self.files.get(file_path)
+
+    def get_line_context(self, file_path: str, line_number: int, context_lines: int = 3) -> Optional[Dict[str, Any]]:
+        chunk = self.files.get(file_path)
+        if not chunk:
+            return None
+
+        new_lines = chunk.new_content.split('\n')
+        if line_number > len(new_lines):
+            return None
+
+        start = max(0, line_number - context_lines - 1)
+        end = min(len(new_lines), line_number + context_lines)
+
+        context = []
+        for i in range(start, end):
+            code = new_lines[i].rstrip('\n')
+            is_current_line = (i == line_number - 1)
+            context.append({
+                'line_number': chunk.new_start + i,
+                'code': code,
+                'is_issue_line': is_current_line
+            })
+
+        return {
+            'file': file_path,
+            'line': line_number,
+            'context': context
+        }
+
+
+def merge_issues_with_code(scan_results: Dict[str, Any], diff: str) -> Dict[str, Any]:
+    """将扫描问题与代码片段关联"""
+    if not diff:
+        return scan_results
+
+    parser = DiffParser(diff)
+    enriched_results = {
+        'scanners': [],
+        'summary': scan_results.get('summary', {}),
+        'total_issues': scan_results.get('total_issues', 0)
+    }
+
+    for scanner_name, scanner_data in scan_results.items():
+        if scanner_name in ['summary', 'total_issues', 'ai']:
+            continue
+
+        if isinstance(scanner_data, dict):
+            enriched_scanner = {
+                'name': scanner_name,
+                'issues': [],
+                'file_count': scanner_data.get('file_count', 0),
+                'total_issues': scanner_data.get('total_issues', 0)
+            }
+
+            issues = scanner_data.get('issues', [])
+            for issue in issues:
+                enriched_issue = enrich_issue_with_code(issue, parser)
+                enriched_scanner['issues'].append(enriched_issue)
+
+            enriched_results['scanners'].append(enriched_scanner)
+
+    if 'ai' in scan_results:
+        enriched_results['ai'] = scan_results['ai']
+
+    return enriched_results
+
+
+def enrich_issue_with_code(issue: Dict[str, Any], parser: DiffParser) -> Dict[str, Any]:
+    """为单个问题添加代码片段"""
+    enriched = issue.copy()
+
+    file_path = issue.get('file', '')
+    line_number = issue.get('line', 0)
+
+    if not file_path:
+        return enriched
+
+    if not line_number:
+        desc = issue.get('description', '') or issue.get('message', '')
+        line_match = re.search(r'line[:#]?\s*(\d+)', desc, re.IGNORECASE)
+        if line_match:
+            line_number = int(line_match.group(1))
+
+    matched_path = None
+    for path in parser.files.keys():
+        if file_path.endswith(path) or path.endswith(file_path) or file_path in path:
+            matched_path = path
+            break
+
+    if matched_path:
+        enriched['file'] = matched_path
+    if matched_path and line_number:
+        context = parser.get_line_context(matched_path, line_number)
+        if context:
+            enriched['code_context'] = context
+
+    if 'code_context' not in enriched and matched_path:
+        chunk = parser.get_file_content(matched_path)
+        if chunk and chunk.new_content:
+            lines = chunk.new_content.split('\n')[:10]
+            enriched['code_context'] = {
+                'file': matched_path,
+                'line': line_number or 1,
+                'preview': '\n'.join(lines),
+                'has_more': len(chunk.new_content.split('\n')) > 10
+            }
+
+    return enriched