This commit is contained in:
Dang Zerong
2026-03-12 14:42:23 +08:00
parent 9ae55407fc
commit 027cf50759
8 changed files with 225 additions and 52 deletions

View File

@@ -41,7 +41,7 @@ class AIReviewer(BaseScanner):
logger.info(f'AI 审查器初始化: {self.provider}/{self.model}')
def scan(self, repo_url: str, commit_id: Optional[str], branch: str) -> Dict[str, Any]:
def scan(self, repo_url: str, commit_id: Optional[str], branch: str, changed_files: Optional[List[str]] = None) -> Dict[str, Any]:
"""
执行代码扫描(实现抽象方法)
@@ -49,16 +49,18 @@ class AIReviewer(BaseScanner):
repo_url: 仓库 URL
commit_id: 提交 ID
branch: 分支名
changed_files: 可选的变更文件列表(来自 PR
Returns:
审查结果
"""
# 调用实际的审查逻辑
return self._do_review(repo_url=repo_url, commit_id=commit_id, branch=branch)
return self._do_review(repo_url=repo_url, commit_id=commit_id, branch=branch, changed_files=changed_files)
def _do_review(self, clone_dir: str = None, repo_url: str = None,
commit_id: str = None, branch: str = None,
language: str = 'python') -> Dict[str, Any]:
language: str = 'python',
changed_files: Optional[List[str]] = None) -> Dict[str, Any]:
"""
执行 AI 代码审查
@@ -68,6 +70,7 @@ class AIReviewer(BaseScanner):
commit_id: 提交 ID
branch: 分支名
language: 编程语言
changed_files: 可选的变更文件列表(来自 PR
Returns:
审查结果
@@ -94,7 +97,7 @@ class AIReviewer(BaseScanner):
}
# 获取要审查的代码文件
files = self._get_code_files(clone_dir, language)
files = self._get_code_files(clone_dir, language, changed_files)
if not files:
return {
@@ -107,7 +110,7 @@ class AIReviewer(BaseScanner):
# 对每个文件进行 AI 审查
all_reviews = []
for file_path in files[:5]: # 限制最多审查 5 个文件
review = self._review_file(file_path, language)
review = self._review_file(file_path, language, clone_dir)
if review:
all_reviews.append(review)
@@ -133,7 +136,7 @@ class AIReviewer(BaseScanner):
'summary': f'AI 审查出错: {str(e)}'
}
def _get_code_files(self, clone_dir: str, language: str) -> List[str]:
def _get_code_files(self, clone_dir: str, language: str, changed_files: Optional[List[str]] = None) -> List[str]:
"""获取代码文件列表"""
import glob
@@ -144,6 +147,18 @@ class AIReviewer(BaseScanner):
}
exts = extensions.get(language, ['.py'])
# 如果提供了变更文件列表,只返回这些文件
if changed_files:
files = []
for changed_file in changed_files:
if any(changed_file.endswith(ext) for ext in exts):
full_path = os.path.join(clone_dir, changed_file)
if os.path.exists(full_path):
files.append(full_path)
return files[:10]
# 否则扫描整个仓库
files = []
for ext in exts:
@@ -157,7 +172,7 @@ class AIReviewer(BaseScanner):
return files[:10] # 最多 10 个文件
def _review_file(self, file_path: str, language: str) -> Optional[Dict[str, Any]]:
def _review_file(self, file_path: str, language: str, clone_dir: str = None) -> Optional[Dict[str, Any]]:
"""审查单个文件"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
@@ -181,9 +196,9 @@ class AIReviewer(BaseScanner):
return None
# 解析响应
filename = os.path.basename(file_path)
rel_path = os.path.relpath(file_path, clone_dir) if (clone_dir and file_path) else file_path
return {
'file': filename,
'file': rel_path,
'path': file_path,
'truncated': truncated,
'review': response
@@ -236,6 +251,7 @@ class AIReviewer(BaseScanner):
logger.warning(f'未知的 AI provider: {self.provider}')
return None
except Exception as e:
print("异常追踪信息:", e.__traceback__)
logger.error(f'AI 调用失败: {str(e)}')
return None

View File

@@ -152,15 +152,28 @@ class BaseScanner(ABC):
'stdout': '',
'stderr': str(e)
}
def get_changed_files(self, clone_dir: str, extensions: List[str]) -> List[str]:
def get_changed_files(self, clone_dir: str, extensions: List[str], changed_files: Optional[List[str]] = None) -> List[str]:
"""
获取指定扩展名的文件列表
Args:
clone_dir: 仓库目录
extensions: 文件扩展名列表
changed_files: 可选的变更文件列表(来自 PR如果提供则只返回这些文件
Returns:
文件路径列表
"""
# 如果提供了变更文件列表,只扫描这些文件
if changed_files:
files = []
for changed_file in changed_files:
# 检查文件扩展名是否匹配
if any(changed_file.endswith(ext) for ext in extensions):
full_path = os.path.join(clone_dir, changed_file)
if os.path.exists(full_path):
files.append(full_path)
return files
# 否则扫描整个仓库
files = []
for root, dirs, filenames in os.walk(clone_dir):
# 跳过隐藏目录和特殊目录

View File

@@ -94,10 +94,8 @@ class DiffParser:
def merge_issues_with_code(scan_results: Dict[str, Any], diff: str) -> Dict[str, Any]:
"""将扫描问题与代码片段关联"""
if not diff:
return scan_results
parser = DiffParser(diff) if diff else None
parser = DiffParser(diff)
enriched_results = {
'scanners': [],
'summary': scan_results.get('summary', {}),
@@ -118,17 +116,93 @@ def merge_issues_with_code(scan_results: Dict[str, Any], diff: str) -> Dict[str,
issues = scanner_data.get('issues', [])
for issue in issues:
enriched_issue = enrich_issue_with_code(issue, parser)
enriched_issue = enrich_issue_with_code(issue, parser) if parser else issue
enriched_scanner['issues'].append(enriched_issue)
enriched_results['scanners'].append(enriched_scanner)
# 处理 AI 审查结果,转换为问题格式
if 'ai' in scan_results:
enriched_results['ai'] = scan_results['ai']
ai_issues = convert_ai_reviews_to_issues(scan_results['ai'], parser)
enriched_results['ai'] = {
'name': 'ai',
'issues': ai_issues,
'summary': scan_results['ai'].get('summary', ''),
'files_reviewed': scan_results['ai'].get('files_reviewed', 0)
}
return enriched_results
def convert_ai_reviews_to_issues(ai_result: Dict[str, Any], parser: Optional[DiffParser] = None) -> List[Dict[str, Any]]:
"""将 AI 审查结果转换为问题格式"""
issues = []
reviews = ai_result.get('reviews', [])
for review in reviews:
file_path = review.get('file', '')
review_data = review.get('review', {})
if not review_data:
continue
# 获取文件内容作为代码上下文
code_context = None
if parser:
matched_path = None
for path in parser.files.keys():
if file_path.endswith(path) or path.endswith(file_path) or file_path in path:
matched_path = path
break
if matched_path:
chunk = parser.get_file_content(matched_path)
if chunk and chunk.new_content:
lines = chunk.new_content.split('\n')[:10]
code_context = {
'file': matched_path,
'line': 1,
'preview': '\n'.join(lines),
'has_more': len(chunk.new_content.split('\n')) > 10
}
# 处理优点(不作为问题显示)
advantages = review_data.get('优点', [])
# 处理问题
problems = review_data.get('问题', [])
for idx, problem in enumerate(problems):
issues.append({
'file': file_path,
'line': 1, # AI 审查不返回具体行号
'severity': 'warning',
'message': f'[AI 建议] {problem}',
'category': 'ai',
'code_context': code_context,
'review_data': {
'type': '问题',
'content': problem
}
})
# 处理优化建议
optimizations = review_data.get('优化', [])
for optimization in optimizations:
issues.append({
'file': file_path,
'line': 1,
'severity': 'info',
'message': f'[AI 优化] {optimization}',
'category': 'ai',
'code_context': code_context,
'review_data': {
'type': '优化',
'content': optimization
}
})
return issues
def enrich_issue_with_code(issue: Dict[str, Any], parser: DiffParser) -> Dict[str, Any]:
"""为单个问题添加代码片段"""
enriched = issue.copy()

View File

@@ -20,7 +20,7 @@ class JavaScriptScanner(BaseScanner):
super().__init__(config)
self.extensions = ['.js', '.jsx', '.ts', '.tsx', '.vue', '.svelte']
def scan(self, repo_url: str, commit_id: Optional[str], branch: str) -> Dict[str, Any]:
def scan(self, repo_url: str, commit_id: Optional[str], branch: str, changed_files: Optional[List[str]] = None) -> Dict[str, Any]:
"""
执行 JavaScript/TypeScript 代码扫描
@@ -28,6 +28,7 @@ class JavaScriptScanner(BaseScanner):
repo_url: 仓库 URL
commit_id: 提交 ID
branch: 分支名
changed_files: 可选的变更文件列表(来自 PR
Returns:
扫描结果
@@ -51,8 +52,8 @@ class JavaScriptScanner(BaseScanner):
# 克隆仓库
clone_dir = self.clone_repo(repo_url, commit_id, branch)
# 获取 JavaScript/TypeScript 文件
js_files = self.get_changed_files(clone_dir, self.extensions)
# 获取 JavaScript/TypeScript 文件(只扫描变更的文件)
js_files = self.get_changed_files(clone_dir, self.extensions, changed_files)
result['files_scanned'] = len(js_files)
if not js_files:
@@ -75,7 +76,7 @@ class JavaScriptScanner(BaseScanner):
return result
def _run_eslint(self, cwd: str, files: List[str]) -> Dict[str, Any]:
def _run_eslint(self, clone_dir: str, files: List[str]) -> Dict[str, Any]:
"""运行 ESLint 扫描"""
result = {
'tool': 'eslint',
@@ -88,7 +89,7 @@ class JavaScriptScanner(BaseScanner):
cmd = ['npx', 'eslint', '--format=json', '--no-eslintrc'] + files
# 如果没有 eslint 配置,先创建默认配置
eslintrc_path = os.path.join(cwd, '.eslintrc.json')
eslintrc_path = os.path.join(clone_dir, '.eslintrc.json')
if not os.path.exists(eslintrc_path):
# 创建简单的 ESLint 配置
eslint_config = {
@@ -106,7 +107,7 @@ class JavaScriptScanner(BaseScanner):
with open(eslintrc_path, 'w') as f:
json.dump(eslint_config, f)
output = self.run_command(cmd, cwd, timeout=120)
output = self.run_command(cmd, clone_dir, timeout=120)
result['raw_output'] = output.get('stdout', '') + output.get('stderr', '')
# 解析 JSON 输出
@@ -115,6 +116,8 @@ class JavaScriptScanner(BaseScanner):
eslint_results = json.loads(output['stdout'])
for file_result in eslint_results:
file_path = file_result.get('filePath', '')
# 使用相对于 clone_dir 的路径
rel_path = os.path.relpath(file_path, clone_dir) if file_path else ''
messages = file_result.get('messages', [])
for msg in messages:
@@ -124,7 +127,7 @@ class JavaScriptScanner(BaseScanner):
'type': severity,
'severity': 'Error' if msg.get('severity', 0) == 2 else 'Warning',
'message': msg.get('message', ''),
'file': os.path.basename(file_path),
'file': rel_path,
'line': msg.get('line', 0),
'column': msg.get('column', 0),
'symbol': msg.get('ruleId', 'unknown')

View File

@@ -20,7 +20,7 @@ class PythonScanner(BaseScanner):
super().__init__(config)
self.extensions = ['.py']
def scan(self, repo_url: str, commit_id: Optional[str], branch: str) -> Dict[str, Any]:
def scan(self, repo_url: str, commit_id: Optional[str], branch: str, changed_files: Optional[List[str]] = None) -> Dict[str, Any]:
"""
执行 Python 代码扫描
@@ -28,6 +28,7 @@ class PythonScanner(BaseScanner):
repo_url: 仓库 URL
commit_id: 提交 ID
branch: 分支名
changed_files: 可选的变更文件列表(来自 PR
Returns:
扫描结果
@@ -51,8 +52,8 @@ class PythonScanner(BaseScanner):
# 克隆仓库
clone_dir = self.clone_repo(repo_url, commit_id, branch)
# 获取 Python 文件
py_files = self.get_changed_files(clone_dir, self.extensions)
# 获取 Python 文件(只扫描变更的文件)
py_files = self.get_changed_files(clone_dir, self.extensions, changed_files)
result['files_scanned'] = len(py_files)
if not py_files:
@@ -84,7 +85,7 @@ class PythonScanner(BaseScanner):
return result
def _run_pylint(self, cwd: str, files: List[str]) -> Dict[str, Any]:
def _run_pylint(self, clone_dir: str, files: List[str]) -> Dict[str, Any]:
"""运行 Pylint 扫描"""
result = {
'tool': 'pylint',
@@ -95,7 +96,7 @@ class PythonScanner(BaseScanner):
# 只扫描变更的文件
try:
cmd = ['python', '-m', 'pylint', '--output-format=json'] + files
output = self.run_command(cmd, cwd, timeout=120)
output = self.run_command(cmd, clone_dir, timeout=120)
result['raw_output'] = output.get('stdout', '')
@@ -104,12 +105,15 @@ class PythonScanner(BaseScanner):
try:
issues = json.loads(output['stdout'])
for issue in issues:
# 使用相对于 clone_dir 的路径
full_path = issue.get('path', '')
rel_path = os.path.relpath(full_path, clone_dir) if full_path else ''
result['issues'].append({
'tool': 'pylint',
'type': issue.get('type', 'info'),
'severity': issue.get('severity', 'Info'),
'message': issue.get('message', ''),
'file': os.path.basename(issue.get('path', '')),
'file': rel_path,
'line': issue.get('line', 0),
'column': issue.get('column', 0),
'symbol': issue.get('symbol', '')
@@ -122,7 +126,7 @@ class PythonScanner(BaseScanner):
return result
def _run_flake8(self, cwd: str, files: List[str]) -> Dict[str, Any]:
def _run_flake8(self, clone_dir: str, files: List[str]) -> Dict[str, Any]:
"""运行 Flake8 扫描"""
result = {
'tool': 'flake8',
@@ -132,7 +136,7 @@ class PythonScanner(BaseScanner):
try:
cmd = ['python', '-m', 'flake8', '--format=json'] + files
output = self.run_command(cmd, cwd, timeout=120)
output = self.run_command(cmd, clone_dir, timeout=120)
result['raw_output'] = output.get('stdout', '')
@@ -141,12 +145,15 @@ class PythonScanner(BaseScanner):
try:
issues = json.loads(output['stdout'])
for issue in issues:
# 使用相对于 clone_dir 的路径
full_path = issue.get('filename', '')
rel_path = os.path.relpath(full_path, clone_dir) if full_path else ''
result['issues'].append({
'tool': 'flake8',
'type': self._map_flake8_code(issue.get('code', '')),
'severity': 'Warning',
'message': issue.get('text', ''),
'file': os.path.basename(issue.get('filename', '')),
'file': rel_path,
'line': issue.get('line_number', 0),
'column': issue.get('column_number', 0),
'symbol': issue.get('code', '')

View File

@@ -21,7 +21,7 @@ class SecurityScanner(BaseScanner):
# 扫描所有代码文件以发现安全问题
self.extensions = ['.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rb', '.php']
def scan(self, repo_url: str, commit_id: Optional[str], branch: str) -> Dict[str, Any]:
def scan(self, repo_url: str, commit_id: Optional[str], branch: str, changed_files: Optional[List[str]] = None) -> Dict[str, Any]:
"""
执行安全扫描
@@ -29,6 +29,7 @@ class SecurityScanner(BaseScanner):
repo_url: 仓库 URL
commit_id: 提交 ID
branch: 分支名
changed_files: 可选的变更文件列表(来自 PR
Returns:
扫描结果
@@ -53,8 +54,8 @@ class SecurityScanner(BaseScanner):
# 克隆仓库
clone_dir = self.clone_repo(repo_url, commit_id, branch)
# 获取所有支持的文件
all_files = self.get_changed_files(clone_dir, self.extensions)
# 获取所有支持的文件(只扫描变更的文件)
all_files = self.get_changed_files(clone_dir, self.extensions, changed_files)
result['files_scanned'] = len(all_files)
if not all_files:
@@ -88,7 +89,7 @@ class SecurityScanner(BaseScanner):
return result
def _run_bandit(self, cwd: str, files: List[str]) -> Dict[str, Any]:
def _run_bandit(self, clone_dir: str, files: List[str]) -> Dict[str, Any]:
"""运行 Bandit 安全扫描"""
result = {
'tool': 'bandit',
@@ -98,7 +99,7 @@ class SecurityScanner(BaseScanner):
try:
# 运行 bandit
cmd = ['python', '-m', 'bandit', '-f', 'json'] + files
output = self.run_command(cmd, cwd, timeout=120)
output = self.run_command(cmd, clone_dir, timeout=120)
# 解析 JSON 输出
if output.get('stdout'):
@@ -107,6 +108,9 @@ class SecurityScanner(BaseScanner):
results = data.get('results', [])
for issue in results:
# 使用相对于 clone_dir 的路径
full_path = issue.get('filename', '')
rel_path = os.path.relpath(full_path, clone_dir) if full_path else ''
# 映射严重级别
severity = issue.get('issue_severity', 'LOW')
result['issues'].append({
@@ -115,7 +119,7 @@ class SecurityScanner(BaseScanner):
'severity': severity,
'confidence': issue.get('issue_confidence', 'LOW'),
'message': issue.get('issue_text', ''),
'file': os.path.basename(issue.get('filename', '')),
'file': rel_path,
'line': issue.get('line_number', 0),
'code': issue.get('code', '')
})
@@ -127,7 +131,7 @@ class SecurityScanner(BaseScanner):
return result
def _scan_js_security(self, cwd: str, files: List[str]) -> Dict[str, Any]:
def _scan_js_security(self, clone_dir: str, files: List[str]) -> Dict[str, Any]:
"""简单的 JavaScript 安全扫描(基于模式匹配)"""
result = {
'tool': 'js-security',
@@ -172,6 +176,8 @@ class SecurityScanner(BaseScanner):
for file_path in files:
try:
# 使用相对于 clone_dir 的路径
rel_path = os.path.relpath(file_path, clone_dir) if file_path else ''
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
lines = content.split('\n')
@@ -185,7 +191,7 @@ class SecurityScanner(BaseScanner):
'severity': pattern_info['severity'],
'confidence': 'MEDIUM',
'message': pattern_info['message'],
'file': os.path.basename(file_path),
'file': rel_path,
'line': line_num,
'code': line.strip()[:80]
})