#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Diff 解析器 - 将扫描问题与代码片段关联 """ import re import logging from typing import Dict, List, Any, Optional from dataclasses import dataclass, field logger = logging.getLogger(__name__) @dataclass class CodeChunk: """代码块""" file_path: str old_content: str = "" new_content: str = "" old_start: int = 0 new_start: int = 0 hunks: List[Dict] = field(default_factory=list) class DiffParser: """Diff 解析器""" def __init__(self, diff_text: str): self.diff_text = diff_text self.files: Dict[str, CodeChunk] = {} self._parse() def _parse(self): """解析 diff 文本""" if not self.diff_text: return current_chunk = None lines = self.diff_text.split('\n') for line in lines: diff_match = re.match(r'diff --git a/(.+) b/(.+)', line) if diff_match: file_path = diff_match.group(1) current_chunk = CodeChunk(file_path=file_path) self.files[file_path] = current_chunk continue hunk_match = re.match(r'@@ -(\d+),?\d* \+(\d+),?\d* @@', line) if hunk_match and current_chunk: current_chunk.old_start = int(hunk_match.group(1)) current_chunk.new_start = int(hunk_match.group(2)) continue if current_chunk and line: if line.startswith('+') and not line.startswith('+++'): current_chunk.new_content += line[1:] + '\n' elif line.startswith('-') and not line.startswith('---'): current_chunk.old_content += line[1:] + '\n' elif line.startswith(' '): current_chunk.old_content += line[1:] + '\n' current_chunk.new_content += line[1:] + '\n' def get_file_content(self, file_path: str) -> Optional[CodeChunk]: return self.files.get(file_path) def get_line_context(self, file_path: str, line_number: int, context_lines: int = 3) -> Optional[Dict[str, Any]]: chunk = self.files.get(file_path) if not chunk: return None new_lines = chunk.new_content.split('\n') if line_number > len(new_lines): return None start = max(0, line_number - context_lines - 1) end = min(len(new_lines), line_number + context_lines) context = [] for i in range(start, end): code = new_lines[i].rstrip('\n') is_current_line = (i == line_number - 1) context.append({ 'line_number': chunk.new_start + i, 'code': code, 'is_issue_line': is_current_line }) return { 'file': file_path, 'line': line_number, 'context': context } def merge_issues_with_code(scan_results: Dict[str, Any], diff: str) -> Dict[str, Any]: """将扫描问题与代码片段关联""" parser = DiffParser(diff) if diff else None enriched_results = { 'scanners': [], 'summary': scan_results.get('summary', {}), 'total_issues': scan_results.get('total_issues', 0) } for scanner_name, scanner_data in scan_results.items(): if scanner_name in ['summary', 'total_issues', 'ai']: continue if isinstance(scanner_data, dict): enriched_scanner = { 'name': scanner_name, 'issues': [], 'file_count': scanner_data.get('file_count', 0), 'total_issues': scanner_data.get('total_issues', 0) } issues = scanner_data.get('issues', []) for issue in issues: enriched_issue = enrich_issue_with_code(issue, parser) if parser else issue enriched_scanner['issues'].append(enriched_issue) enriched_results['scanners'].append(enriched_scanner) # 处理 AI 审查结果,转换为问题格式 if 'ai' in scan_results: ai_issues = convert_ai_reviews_to_issues(scan_results['ai'], parser) enriched_results['ai'] = { 'name': 'ai', 'issues': ai_issues, 'summary': scan_results['ai'].get('summary', ''), 'files_reviewed': scan_results['ai'].get('files_reviewed', 0) } return enriched_results def convert_ai_reviews_to_issues(ai_result: Dict[str, Any], parser: Optional[DiffParser] = None) -> List[Dict[str, Any]]: """将 AI 审查结果(issues 格式)转换为统一问题格式""" issues = [] ai_issues = ai_result.get('issues', []) for issue in ai_issues: file_path = issue.get('file', '') if not file_path: continue code_context = None if parser: matched_path = None for path in parser.files.keys(): if file_path.endswith(path) or path.endswith(file_path) or file_path in path: matched_path = path break if matched_path: chunk = parser.get_file_content(matched_path) if chunk and chunk.new_content: lines = chunk.new_content.split('\n')[:10] code_context = { 'file': matched_path, 'line': issue.get('line', 1), 'preview': '\n'.join(lines), 'has_more': len(chunk.new_content.split('\n')) > 10 } sev = issue.get('severity', 'warning') sev = sev.lower() if isinstance(sev, str) else 'warning' issues.append({ 'file': file_path, 'line': issue.get('line', 1), 'severity': sev, 'message': issue.get('message', ''), 'category': 'ai', 'code_context': code_context, 'defect_reason': issue.get('defect_reason', '') }) return issues def enrich_issue_with_code(issue: Dict[str, Any], parser: DiffParser) -> Dict[str, Any]: """为单个问题添加代码片段""" enriched = issue.copy() file_path = issue.get('file', '') line_number = issue.get('line', 0) if not file_path: return enriched if not line_number: desc = issue.get('description', '') or issue.get('message', '') line_match = re.search(r'line[:#]?\s*(\d+)', desc, re.IGNORECASE) if line_match: line_number = int(line_match.group(1)) matched_path = None for path in parser.files.keys(): if file_path.endswith(path) or path.endswith(file_path) or file_path in path: matched_path = path break if matched_path: enriched['file'] = matched_path if matched_path and line_number: context = parser.get_line_context(matched_path, line_number) if context: enriched['code_context'] = context if 'code_context' not in enriched and matched_path: chunk = parser.get_file_content(matched_path) if chunk and chunk.new_content: lines = chunk.new_content.split('\n')[:10] enriched['code_context'] = { 'file': matched_path, 'line': line_number or 1, 'preview': '\n'.join(lines), 'has_more': len(chunk.new_content.split('\n')) > 10 } return enriched