This commit is contained in:
Dang Zerong
2026-03-09 09:24:08 +08:00
parent 378feffe74
commit d2f53ee233
13 changed files with 1400 additions and 0 deletions

142
scanner/base.py Normal file
View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
代码扫描器基类
定义扫描器接口和通用功能
"""
import os
import logging
import tempfile
import shutil
from abc import ABC, abstractmethod
from typing import Dict, Any, List, Optional
from git import Repo
logger = logging.getLogger(__name__)
class BaseScanner(ABC):
"""代码扫描器基类"""
def __init__(self, config: Dict[str, Any]):
"""
初始化扫描器
Args:
config: 扫描器配置
"""
self.config = config
self.temp_dir = config.get('temp_clone_dir', '/tmp/code_scanner_clones')
self.max_issues = config.get('max_issues', 10)
self.detailed = config.get('detailed', True)
# 确保临时目录存在
os.makedirs(self.temp_dir, exist_ok=True)
@abstractmethod
def scan(self, repo_url: str, commit_id: Optional[str], branch: str) -> Dict[str, Any]:
"""
执行代码扫描
Args:
repo_url: 仓库 URL
commit_id: 提交 ID
branch: 分支名
Returns:
扫描结果
"""
pass
def clone_repo(self, repo_url: str, commit_id: Optional[str], branch: str) -> str:
"""
克隆代码仓库到临时目录
Args:
repo_url: 仓库 URL
commit_id: 提交 ID可选为 None 时使用 branch
branch: 分支名
Returns:
克隆的目录路径
"""
# 生成唯一的目录名
repo_name = repo_url.split('/')[-1].replace('.git', '')
commit_hash = commit_id or branch
clone_dir = os.path.join(self.temp_dir, f"{repo_name}_{commit_hash}")
# 如果目录已存在,先删除
if os.path.exists(clone_dir):
shutil.rmtree(clone_dir)
try:
logger.info(f'克隆仓库: {repo_url}')
# 克隆仓库(浅克隆,只获取最新提交)
repo = Repo.clone_from(
repo_url,
clone_dir,
depth=1,
branch=branch
)
# 如果指定了 commit_id切换到该提交
if commit_id:
repo.git.checkout(commit_id)
logger.info(f'仓库克隆成功: {clone_dir}')
return clone_dir
except Exception as e:
logger.error(f'克隆仓库失败: {str(e)}')
raise
def cleanup(self, clone_dir: str):
"""
清理临时目录
Args:
clone_dir: 克隆的目录路径
"""
try:
if os.path.exists(clone_dir):
shutil.rmtree(clone_dir)
logger.info(f'清理临时目录: {clone_dir}')
except Exception as e:
logger.warning(f'清理临时目录失败: {str(e)}')
def run_command(self, cmd: List[str], cwd: str, timeout: int = 300) -> Dict[str, Any]:
"""
运行命令并返回结果
Args:
cmd: 命令列表
cwd: 工作目录
timeout: 超时时间(秒)
Returns:
命令执行结果
"""
import subprocess
try:
result = subprocess.run(
cmd,
cwd=cwd,
capture_output=True,
text=True,
timeout=timeout
)
return {
'success': result.returncode == 0,
'returncode': result.returncode,
'stdout': result.stdout,
'stderr': result.stderr
}
except subprocess.TimeoutExpired:
return {
'success': False,
'returncode': -1,
'stdout': '',
'stderr': 'Command timeout'
}
except Exception as e:
return {
'success': False,
'returncode': -1,
'stdout': '',
'stderr': str(e)
}
def get_changed_files(self, clone_dir: str, extensions: List[str]) -> List[str]:
"""
获取指定扩展名的文件列表
Args:
clone_dir: 仓库目录
extensions: 文件扩展名列表
Returns:
文件路径列表
"""
files = []
for root, dirs, filenames in os.walk(clone_dir):
# 跳过隐藏目录和特殊目录
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv', '.git']]
for filename in filenames:
if any(filename.endswith(ext) for ext in extensions):
files.append(os.path.join(root, filename))
return files