CodeReview/backend/app/services/scanner.py

"""
仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描
"""

import os
import asyncio
import httpx
from typing import List, Dict, Any, Optional
from datetime import datetime, timezone
from urllib.parse import urlparse, quote
from sqlalchemy.ext.asyncio import AsyncSession

from app.utils.repo_utils import parse_repository_url
from app.models.audit import AuditTask, AuditIssue
from app.models.project import Project
from app.services.llm.service import LLMService
from app.core.config import settings
from app.core.file_filter import is_text_file as core_is_text_file, should_exclude as core_should_exclude, TEXT_EXTENSIONS as CORE_TEXT_EXTENSIONS


def get_analysis_config(user_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """
    获取分析配置参数（优先使用用户配置，然后使用系统配置）

    Returns:
        包含以下字段的字典:
        - max_analyze_files: 最大分析文件数
        - llm_concurrency: LLM 并发数
        - llm_gap_ms: LLM 请求间隔（毫秒）
    """
    other_config = (user_config or {}).get('otherConfig', {})

    return {
        'max_analyze_files': other_config.get('maxAnalyzeFiles') or settings.MAX_ANALYZE_FILES,
        'llm_concurrency': other_config.get('llmConcurrency') or settings.LLM_CONCURRENCY,
        'llm_gap_ms': other_config.get('llmGapMs') or settings.LLM_GAP_MS,
    }


# 支持的文本文件扩展名使用全局定义
TEXT_EXTENSIONS = list(CORE_TEXT_EXTENSIONS)

def is_text_file(path: str) -> bool:
    """检查是否为文本文件"""
    return core_is_text_file(path)


def should_exclude(path: str, exclude_patterns: List[str] = None) -> bool:
    """检查是否应该排除该文件"""
    filename = os.path.basename(path)
    return core_should_exclude(path, filename, exclude_patterns)


def get_language_from_path(path: str) -> str:
    """从文件路径获取语言类型"""
    ext = path.split('.')[-1].lower() if '.' in path else ''
    language_map = {
        'py': 'python',
        'js': 'javascript', 'jsx': 'javascript',
        'ts': 'typescript', 'tsx': 'typescript',
        'java': 'java', 'go': 'go', 'rs': 'rust',
        'cpp': 'cpp', 'c': 'c', 'cc': 'cpp', 'h': 'c', 'hh': 'cpp',
        'hpp': 'cpp', 'hxx': 'cpp',
        'cs': 'csharp', 'php': 'php', 'rb': 'ruby',
        'kt': 'kotlin', 'ktm': 'kotlin', 'kts': 'kotlin',
        'swift': 'swift', 'dart': 'dart',
        'scala': 'scala', 'sc': 'scala',
        'groovy': 'groovy', 'gsh': 'groovy', 'gvy': 'groovy', 'gy': 'groovy',
        'sql': 'sql', 'sh': 'bash', 'bash': 'bash', 'zsh': 'bash',
        'pl': 'perl', 'pm': 'perl', 't': 'perl',
        'lua': 'lua', 'hs': 'haskell', 'lhs': 'haskell',
        'clj': 'clojure', 'cljs': 'clojure', 'cljc': 'clojure', 'edn': 'clojure',
        'ex': 'elixir', 'exs': 'elixir', 'erl': 'erlang', 'hrl': 'erlang',
        'm': 'objective-c', 'mm': 'objective-c',
        'r': 'r', 'rmd': 'r',
        'vb': 'visual-basic', 'fs': 'fsharp', 'fsi': 'fsharp', 'fsx': 'fsharp',
        'tf': 'hcl', 'hcl': 'hcl', 'dockerfile': 'dockerfile'
    }
    return language_map.get(ext, 'text')


class TaskControlManager:
    """任务控制管理器 - 用于取消运行中的任务"""
    
    def __init__(self):
        self._cancelled_tasks: set = set()
    
    def cancel_task(self, task_id: str):
        """取消任务"""
        self._cancelled_tasks.add(task_id)
        print(f"🛑 任务 {task_id} 已标记为取消")
    
    def is_cancelled(self, task_id: str) -> bool:
        """检查任务是否被取消"""
        return task_id in self._cancelled_tasks
    
    def cleanup_task(self, task_id: str):
        """清理已完成任务的控制状态"""
        self._cancelled_tasks.discard(task_id)


# 全局任务控制器
task_control = TaskControlManager()


async def github_api(url: str, token: str = None) -> Any:
    """调用GitHub API"""
    headers = {"Accept": "application/vnd.github+json"}
    t = token or settings.GITHUB_TOKEN
    
    async with httpx.AsyncClient(timeout=30) as client:
        # First try with token if available
        if t:
            headers["Authorization"] = f"Bearer {t}"
            try:
                response = await client.get(url, headers=headers)
                if response.status_code == 200:
                    return response.json()
                if response.status_code != 401:
                    if response.status_code == 403:
                        raise Exception("GitHub API 403：请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
                    raise Exception(f"GitHub API {response.status_code}: {url}")
                # If 401, fall through to retry without token
                print(f"[API] GitHub API 401 (Unauthorized) with token, retrying without token for: {url}")
            except Exception as e:
                if "GitHub API 401" not in str(e) and "401" not in str(e):
                    raise
        
        # Try without token
        if "Authorization" in headers:
            del headers["Authorization"]
        
        try:
            response = await client.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
            if response.status_code == 403:
                raise Exception("GitHub API 403：请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
            if response.status_code == 401:
                raise Exception("GitHub API 401：请配置 GITHUB_TOKEN 或确认仓库权限")
            raise Exception(f"GitHub API {response.status_code}: {url}")
        except Exception as e:
            print(f"[API] GitHub API 调用失败: {url}, 错误: {e}")
            raise


async def gitea_api(url: str, token: str = None) -> Any:
    """调用Gitea API"""
    headers = {"Content-Type": "application/json"}
    t = token or settings.GITEA_TOKEN
    
    async with httpx.AsyncClient(timeout=30) as client:
        # First try with token if available
        if t:
            headers["Authorization"] = f"token {t}"
            try:
                response = await client.get(url, headers=headers)
                if response.status_code == 200:
                    return response.json()
                if response.status_code != 401:
                    if response.status_code == 403:
                        raise Exception("Gitea API 403：请确认仓库权限/频率限制")
                    raise Exception(f"Gitea API {response.status_code}: {url}")
                # If 401, fall through to retry without token
                print(f"[API] Gitea API 401 (Unauthorized) with token, retrying without token for: {url}")
            except Exception as e:
                if "Gitea API 401" not in str(e) and "401" not in str(e):
                    raise
        
        # Try without token
        if "Authorization" in headers:
            del headers["Authorization"]
            
        try:
            response = await client.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
            if response.status_code == 401:
                raise Exception("Gitea API 401：请配置 GITEA_TOKEN 或确认仓库权限")
            if response.status_code == 403:
                raise Exception("Gitea API 403：请确认仓库权限/频率限制")
            raise Exception(f"Gitea API {response.status_code}: {url}")
        except Exception as e:
            print(f"[API] Gitea API 调用失败: {url}, 错误: {e}")
            raise


async def gitlab_api(url: str, token: str = None) -> Any:
    """调用GitLab API"""
    headers = {"Content-Type": "application/json"}
    t = token or settings.GITLAB_TOKEN
    
    async with httpx.AsyncClient(timeout=30) as client:
        # First try with token if available
        if t:
            headers["PRIVATE-TOKEN"] = t
            try:
                response = await client.get(url, headers=headers)
                if response.status_code == 200:
                    return response.json()
                if response.status_code != 401:
                    if response.status_code == 403:
                        raise Exception("GitLab API 403：请确认仓库权限/频率限制")
                    raise Exception(f"GitLab API {response.status_code}: {url}")
                # If 401, fall through to retry without token
                print(f"[API] GitLab API 401 (Unauthorized) with token, retrying without token for: {url}")
            except Exception as e:
                if "GitLab API 401" not in str(e) and "401" not in str(e):
                    raise
        
        # Try without token
        if "PRIVATE-TOKEN" in headers:
            del headers["PRIVATE-TOKEN"]
            
        try:
            response = await client.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
            if response.status_code == 401:
                raise Exception("GitLab API 401：请配置 GITLAB_TOKEN 或确认仓库权限")
            if response.status_code == 403:
                raise Exception("GitLab API 403：请确认仓库权限/频率限制")
            raise Exception(f"GitLab API {response.status_code}: {url}")
        except Exception as e:
            print(f"[API] GitLab API 调用失败: {url}, 错误: {e}")
            raise


async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Optional[str]:
    """获取文件内容"""
    async with httpx.AsyncClient(timeout=30) as client:
        try:
            response = await client.get(url, headers=headers or {})
            if response.status_code == 200:
                return response.text
            
            # 如果带 Token 请求失败（401/403），尝试不带 Token 请求（针对公开仓库）
            if response.status_code in (401, 403) and headers:
                print(f"[API] 获取文件内容返回 {response.status_code}，尝试不带 Token 重试: {url}")
                response = await client.get(url)
                if response.status_code == 200:
                    return response.text
                
        except Exception as e:
            print(f"获取文件内容失败: {url}, 错误: {e}")
    return None


async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
    """获取GitHub仓库分支列表"""
    repo_info = parse_repository_url(repo_url, "github")
    owner, repo = repo_info['owner'], repo_info['repo']
    
    branches_url = f"https://api.github.com/repos/{owner}/{repo}/branches?per_page=100"
    branches_data = await github_api(branches_url, token)
    
    if not isinstance(branches_data, list):
        print(f"[Branch] 警告: 获取 GitHub 分支列表返回非列表数据: {branches_data}")
        return []
        
    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]


async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
    """获取Gitea仓库分支列表"""
    repo_info = parse_repository_url(repo_url, "gitea")
    base_url = repo_info['base_url'] # This is {base}/api/v1
    owner, repo = repo_info['owner'], repo_info['repo']
    
    branches_url = f"{base_url}/repos/{owner}/{repo}/branches"
    branches_data = await gitea_api(branches_url, token)
    
    if not isinstance(branches_data, list):
        print(f"[Branch] 警告: 获取 Gitea 分支列表返回非列表数据: {branches_data}")
        return []
        
    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]


async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
    """获取GitLab仓库分支列表"""
    parsed = urlparse(repo_url)
    
    extracted_token = token
    if parsed.username:
        if parsed.username == 'oauth2' and parsed.password:
            extracted_token = parsed.password
        elif parsed.username and not parsed.password:
            extracted_token = parsed.username
    
    repo_info = parse_repository_url(repo_url, "gitlab")
    base_url = repo_info['base_url']
    project_path = quote(repo_info['project_path'], safe='')
    
    branches_url = f"{base_url}/projects/{project_path}/repository/branches?per_page=100"
    branches_data = await gitlab_api(branches_url, extracted_token)
    
    if not isinstance(branches_data, list):
        print(f"[Branch] 警告: 获取 GitLab 分支列表返回非列表数据: {branches_data}")
        return []
        
    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]


async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
    """获取GitHub仓库文件列表"""
    # 解析仓库URL
    repo_info = parse_repository_url(repo_url, "github")
    owner, repo = repo_info['owner'], repo_info['repo']
    
    # 获取仓库文件树
    tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
    tree_data = await github_api(tree_url, token)
    
    files = []
    for item in tree_data.get("tree", []):
        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
            size = item.get("size", 0)
            if size <= settings.MAX_FILE_SIZE_BYTES:
                files.append({
                    "path": item["path"],
                    "url": f"https://raw.githubusercontent.com/{owner}/{repo}/{quote(branch)}/{item['path']}"
                })
    
    return files


async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
    """获取GitLab仓库文件列表"""
    parsed = urlparse(repo_url)
    
    # 从URL中提取token（如果存在）
    extracted_token = token
    if parsed.username:
        if parsed.username == 'oauth2' and parsed.password:
            extracted_token = parsed.password
        elif parsed.username and not parsed.password:
            extracted_token = parsed.username
    
    # 解析项目路径
    repo_info = parse_repository_url(repo_url, "gitlab")
    base_url = repo_info['base_url'] # {base}/api/v4
    project_path = quote(repo_info['project_path'], safe='')
    
    # 获取仓库文件树
    tree_url = f"{base_url}/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
    tree_data = await gitlab_api(tree_url, extracted_token)
    
    files = []
    for item in tree_data:
        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
            files.append({
                "path": item["path"],
                "url": f"{base_url}/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
                "token": extracted_token
            })
    
    return files


async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
    """获取Gitea仓库文件列表"""
    repo_info = parse_repository_url(repo_url, "gitea")
    base_url = repo_info['base_url']
    owner, repo = repo_info['owner'], repo_info['repo']
    
    # Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=true
    # 可以直接使用分支名作为sha
    tree_url = f"{base_url}/repos/{quote(owner)}/{quote(repo)}/git/trees/{quote(branch)}?recursive=true"
    tree_data = await gitea_api(tree_url, token)
    
    files = []
    for item in tree_data.get("tree", []):
         # Gitea API returns 'type': 'blob' for files
        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
            # 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch}
             files.append({
                "path": item["path"],
                "url": f"{base_url}/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
                "token": token # 传递token以便fetch_file_content使用
            })
    
    return files
async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
    """
    后台仓库扫描任务
    
    Args:
        task_id: 任务ID
        db_session_factory: 数据库会话工厂
        user_config: 用户配置字典（包含llmConfig和otherConfig）
    """
    async with db_session_factory() as db:
        task = await db.get(AuditTask, task_id)
        if not task:
            return

        try:
            # 1. 更新状态为运行中
            task.status = "running"
            task.started_at = datetime.now(timezone.utc)
            await db.commit()
            
            # 创建使用用户配置的LLM服务实例
            llm_service = LLMService(user_config=user_config or {})

            # 2. 获取项目信息
            project = await db.get(Project, task.project_id)
            if not project:
                raise Exception("项目不存在")
            
            # 检查项目类型 - 仅支持仓库类型项目
            source_type = getattr(project, 'source_type', 'repository')
            if source_type == 'zip':
                raise Exception("ZIP类型项目请使用ZIP上传扫描接口")
            
            if not project.repository_url:
                raise Exception("仓库地址不存在")

            repo_url = project.repository_url
            branch = task.branch_name or project.default_branch or "main"
            repo_type = project.repository_type or "other"
            
            # 解析任务的排除模式
            import json as json_module
            task_exclude_patterns = []
            if task.exclude_patterns:
                try:
                    task_exclude_patterns = json_module.loads(task.exclude_patterns)
                except:
                    pass

            print(f"🚀 开始扫描仓库: {repo_url}, 分支: {branch}, 类型: {repo_type}, 来源: {source_type}")
            if task_exclude_patterns:
                print(f"📋 排除模式: {task_exclude_patterns}")

            # 3. 获取文件列表
            # Git Token 始终来自系统默认（.env），逻辑锁定
            github_token = settings.GITHUB_TOKEN
            gitlab_token = settings.GITLAB_TOKEN
            gitea_token = settings.GITEA_TOKEN

            
            # 获取SSH私钥（如果配置了）
            user_other_config = user_config.get('otherConfig', {}) if user_config else {}
            ssh_private_key = None
            if 'sshPrivateKey' in user_other_config:
                from app.core.encryption import decrypt_sensitive_data
                ssh_private_key = decrypt_sensitive_data(user_other_config['sshPrivateKey'])

            files: List[Dict[str, str]] = []
            extracted_gitlab_token = None

            # 检查是否为SSH URL
            from app.services.git_ssh_service import GitSSHOperations
            is_ssh_url = GitSSHOperations.is_ssh_url(repo_url)

            if is_ssh_url:
                # 使用SSH方式获取文件
                if not ssh_private_key:
                    raise Exception("仓库使用SSH URL，但未配置SSH密钥。请先生成并配置SSH密钥。")

                print(f"🔐 使用SSH方式访问仓库: {repo_url}")
                try:
                    files_with_content = GitSSHOperations.get_repo_files_via_ssh(
                        repo_url, ssh_private_key, branch, task_exclude_patterns
                    )
                    # 转换为统一格式
                    files = [{'path': f['path'], 'content': f['content']} for f in files_with_content]
                    actual_branch = branch
                    print(f"✅ 通过SSH成功获取 {len(files)} 个文件")
                except Exception as e:
                    raise Exception(f"SSH方式获取仓库文件失败: {str(e)}")
            else:
                # 使用API方式获取文件（原有逻辑）
                # 构建分支尝试顺序（分支降级机制）
                branches_to_try = [branch]
                if project.default_branch and project.default_branch != branch:
                    branches_to_try.append(project.default_branch)
                for common_branch in ["main", "master"]:
                    if common_branch not in branches_to_try:
                        branches_to_try.append(common_branch)

                actual_branch = branch  # 实际使用的分支
                last_error = None

                for try_branch in branches_to_try:
                    try:
                        print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
                        if repo_type == "github":
                            files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
                        elif repo_type == "gitlab":
                            files = await get_gitlab_files(repo_url, try_branch, gitlab_token, task_exclude_patterns)
                            # GitLab文件可能带有token
                            if files and 'token' in files[0]:
                                extracted_gitlab_token = files[0].get('token')
                        elif repo_type == "gitea":
                            files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
                        else:
                            raise Exception("不支持的仓库类型，仅支持 GitHub, GitLab 和 Gitea 仓库")

                        if files:
                            actual_branch = try_branch
                            if try_branch != branch:
                                print(f"⚠️ 分支 {branch} 不存在或无法访问，已降级到分支 {try_branch}")
                            break
                    except Exception as e:
                        last_error = str(e)
                        print(f"⚠️ 获取分支 {try_branch} 失败: {last_error[:100]}")
                        continue

                if not files:
                    error_msg = f"无法获取仓库文件，所有分支尝试均失败"
                    if last_error:
                        if "404" in last_error or "Not Found" in last_error:
                            error_msg = f"仓库或分支不存在: {branch}"
                        elif "401" in last_error or "403" in last_error:
                            error_msg = "无访问权限，请检查 Token 配置"
                        else:
                            error_msg = f"获取文件失败: {last_error[:100]}"
                    raise Exception(error_msg)

            print(f"✅ 成功获取分支 {actual_branch} 的文件列表")

            # 获取分析配置（优先使用用户配置）
            analysis_config = get_analysis_config(user_config)
            max_analyze_files = analysis_config['max_analyze_files']
            analysis_concurrency = analysis_config['llm_concurrency'] # 并发数
            llm_gap_ms = analysis_config['llm_gap_ms']

            # 限制文件数量
            # 如果指定了特定文件，则只分析这些文件
            target_files = (user_config or {}).get('scan_config', {}).get('file_paths', [])
            if target_files:
                print(f"🎯 指定分析 {len(target_files)} 个文件")
                files = [f for f in files if f['path'] in target_files]
            elif max_analyze_files > 0:
                files = files[:max_analyze_files]

            task.total_files = len(files)
            await db.commit()

            print(f"📊 获取到 {len(files)} 个文件，开始分析 (最大文件数: {max_analyze_files}, 请求间隔: {llm_gap_ms}ms)")

            # 4. 分析文件
            total_issues = 0
            total_lines = 0
            quality_scores = []
            scanned_files = 0
            failed_files = 0
            skipped_files = 0  # 跳过的文件（空文件、太大等）
            consecutive_failures = 0
            MAX_CONSECUTIVE_FAILURES = 5
            last_error = None

            # 4. 并行分析文件
            print(f"🧬 启动并行分析: {len(files)} 个文件, 并发数: {analysis_concurrency}")
            
            semaphore = asyncio.Semaphore(analysis_concurrency)
            
            async def analyze_single_file(file_info):
                """内部函数：分析单个文件并返回结果"""
                nonlocal consecutive_failures, last_error
                
                async with semaphore:
                    if task_control.is_cancelled(task_id):
                        return None
                        
                    f_path = file_info['path']
                    MAX_RETRIES = 3
                    for attempt in range(MAX_RETRIES):
                        try:
                            # 4.1 获取文件内容 (仅在第一次尝试或内容获取失败时获取)
                            if attempt == 0:
                                if is_ssh_url:
                                    content = file_info.get('content', '')
                                else:
                                    headers = {}
                                    if repo_type == "gitlab":
                                        token_to_use = file_info.get('token') or gitlab_token
                                        if token_to_use: headers["PRIVATE-TOKEN"] = token_to_use
                                    elif repo_type == "gitea":
                                        token_to_use = file_info.get('token') or gitea_token
                                        if token_to_use: headers["Authorization"] = f"token {token_to_use}"
                                    elif repo_type == "github" and github_token:
                                        headers["Authorization"] = f"Bearer {github_token}"
                                    
                                    content = await fetch_file_content(file_info["url"], headers)

                                if not content or not content.strip():
                                    return {"type": "skip", "reason": "empty", "path": f_path}
                                
                                if len(content) > settings.MAX_FILE_SIZE_BYTES:
                                    return {"type": "skip", "reason": "too_large", "path": f_path}
                            
                            if task_control.is_cancelled(task_id):
                                return None

                            # 4.2 LLM 分析
                            language = get_language_from_path(f_path)
                            scan_config = (user_config or {}).get('scan_config', {})
                            rule_set_id = scan_config.get('rule_set_id')
                            prompt_template_id = scan_config.get('prompt_template_id')
                            
                            if rule_set_id or prompt_template_id:
                                analysis_result = await llm_service.analyze_code_with_rules(
                                    content, language,
                                    rule_set_id=rule_set_id,
                                    prompt_template_id=prompt_template_id,
                                    db_session=None
                                )
                            else:
                                analysis_result = await llm_service.analyze_code(content, language)
                            
                            return {
                                "type": "success",
                                "path": f_path,
                                "content": content,
                                "language": language,
                                "analysis": analysis_result
                            }
                        except asyncio.CancelledError:
                            # 捕获取消异常，不再重试
                            return None
                        except Exception as e:
                            if attempt < MAX_RETRIES - 1:
                                wait_time = (attempt + 1) * 2
                                # 特殊处理限流错误提示
                                error_str = str(e)
                                if "429" in error_str or "rate limit" in error_str.lower() or "额度不足" in error_str:
                                    print(f"🚫 [限流提示] 仓库扫描任务触发 LLM 频率限制 (429)，建议在设置中降低并发数或增加请求间隔。文件: {f_path}")
                                
                                print(f"⚠️ 分析文件失败 ({f_path}), 正在进行第 {attempt+1} 次重试... 错误: {e}")
                                await asyncio.sleep(wait_time)
                                continue
                            else:
                                print(f"❌ 分析文件最终失败 ({f_path}): {e}")
                                last_error = str(e)
                                return {"type": "error", "path": f_path, "error": str(e)}

            # 创建所有分析任务对象以便跟踪
            task_objects = [asyncio.create_task(analyze_single_file(f)) for f in files]
            
            try:
                # 使用 as_completed 处理结果，这样可以实时更新进度且安全使用当前 db session
                for future in asyncio.as_completed(task_objects):
                    if task_control.is_cancelled(task_id):
                        # 停止处理后续完成的任务
                        print(f"🛑 任务 {task_id} 检测到取消信号，停止主循环")
                        break

                    try:
                        res = await future
                    except asyncio.CancelledError:
                        continue

                    if not res: continue

                    if res["type"] == "skip":
                        skipped_files += 1
                        task.total_files = max(0, task.total_files - 1)
                    elif res["type"] == "error":
                        failed_files += 1
                        consecutive_failures += 1
                    elif res["type"] == "success":
                        consecutive_failures = 0
                        scanned_files += 1
                        
                        f_path = res["path"]
                        analysis = res["analysis"]
                        file_lines = res["content"].split('\n')
                        total_lines += len(file_lines)
                        
                        # 保存问题
                        issues = analysis.get("issues", [])
                        for issue in issues:
                            try:
                                # 防御性检查：确保 issue 是字典
                                if not isinstance(issue, dict):
                                    print(f"⚠️ 警告: 任务 {task_id} 中文件 {f_path} 的分析结果包含无效的问题格式: {issue}")
                                    continue

                                line_num = issue.get("line", 1)
                                code_snippet = issue.get("code_snippet")
                                if not code_snippet or len(code_snippet.strip()) < 5:
                                    try:
                                        idx = max(0, int(line_num) - 1)
                                        start = max(0, idx - 2)
                                        end = min(len(file_lines), idx + 3)
                                        code_snippet = '\n'.join(file_lines[start:end])
                                    except Exception:
                                        code_snippet = ""

                                audit_issue = AuditIssue(
                                    task_id=task.id,
                                    file_path=f_path,
                                    line_number=line_num,
                                    column_number=issue.get("column"),
                                    issue_type=issue.get("type", "maintainability"),
                                    severity=issue.get("severity", "low"),
                                    title=issue.get("title", "Issue"),
                                    message=issue.get("description") or issue.get("title", "Issue"),
                                    suggestion=issue.get("suggestion"),
                                    code_snippet=code_snippet,
                                    ai_explanation=issue.get("ai_explanation"),
                                    status="open"
                                )
                                db.add(audit_issue)
                                total_issues += 1
                            except Exception as e:
                                print(f"⚠️ 处理单个问题时出错 (文件 {f_path}): {e}")
                                continue
                        
                        if "quality_score" in analysis:
                            try:
                                quality_score = float(analysis["quality_score"])
                                quality_scores.append(quality_score)
                            except (ValueError, TypeError):
                                pass

                    # 更新主任务进度
                    processed_count = scanned_files + failed_files
                    task.scanned_files = processed_count
                    task.total_lines = total_lines
                    task.issues_count = total_issues
                    await db.commit() # 这里的 commit 是在一个协程里按序进行的，是安全的
                    
                    if processed_count % 10 == 0 or processed_count == len(files):
                        print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")

                    if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
                        print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次，停止分析")
                        break
            finally:
                # 无论正常结束、中途 break 还是发生异常，都确保取消所有未完成的任务
                pending_count = 0
                for t in task_objects:
                    if not t.done():
                        t.cancel()
                        pending_count += 1
                
                if pending_count > 0:
                    print(f"🧹 任务 {task_id}: 已清理 {pending_count} 个后台待处理或执行中的任务")
                    # 等待一下让取消逻辑执行完毕，但不阻塞太久
                    await asyncio.gather(*task_objects, return_exceptions=True)

            # 5. 完成任务
            avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
            
            # 判断任务状态
            # 如果所有文件都被跳过（空文件等），标记为完成但给出提示
            if len(files) > 0 and scanned_files == 0 and skipped_files == len(files):
                task.status = "completed"
                task.completed_at = datetime.now(timezone.utc)
                task.scanned_files = 0
                task.total_lines = 0
                task.issues_count = 0
                task.quality_score = 100.0
                await db.commit()
                print(f"⚠️ 任务 {task_id} 完成: 所有 {len(files)} 个文件均为空或被跳过，无需分析")
            # 如果有文件需要分析但全部失败（LLM调用失败），标记为失败
            elif len(files) > 0 and scanned_files == 0 and failed_files > 0:
                task.status = "failed"
                task.completed_at = datetime.now(timezone.utc)
                task.scanned_files = 0
                task.total_lines = total_lines
                task.issues_count = 0
                task.quality_score = 0
                
                # 尝试从最后一个错误中获取更详细的系统提示
                error_msg = f"{failed_files} 个文件分析失败，请检查 LLM API 配置。最近一个错误: {str(last_error) if 'last_error' in locals() else '未知错误'}"
                task.error_message = error_msg
                await db.commit()
                print(f"❌ 任务 {task_id} 失败: {error_msg}")
            else:
                task.status = "completed"
                task.completed_at = datetime.now(timezone.utc)
                # 最终显示的已扫描文件数为成功分析的文件数
                task.scanned_files = scanned_files
                task.total_lines = total_lines
                task.issues_count = total_issues
                task.quality_score = avg_quality_score
                await db.commit()
                
                result_msg = f"✅ 任务 {task_id} 完成: 成功分析 {scanned_files} 个文件"
                if failed_files > 0:
                    result_msg += f", {failed_files} 个文件失败"
                result_msg += f", 发现 {total_issues} 个问题, 质量分 {avg_quality_score:.1f}"
                print(result_msg)
            task_control.cleanup_task(task_id)

        except Exception as e:
            print(f"❌ 扫描失败: {e}")
            task.status = "failed"
            task.completed_at = datetime.now(timezone.utc)
            await db.commit()
            task_control.cleanup_task(task_id)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								"""
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								"""
-												The fast scan mode follows the file exclusion pattern used during RAG embedding.

											
										
										
											2026-01-09 16:41:40 +08:00
+								import os
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								import asyncio
 								import httpx
 								from typing import List, Dict, Any, Optional
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								from datetime import datetime, timezone
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								from urllib.parse import urlparse, quote
 								from sqlalchemy.ext.asyncio import AsyncSession
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								from app.utils.repo_utils import parse_repository_url
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								from app.models.audit import AuditTask, AuditIssue
 								from app.models.project import Project
 								from app.services.llm.service import LLMService
 								from app.core.config import settings
-												The fast scan mode follows the file exclusion pattern used during RAG embedding.

											
										
										
											2026-01-09 16:41:40 +08:00
+								from app.core.file_filter import is_text_file as core_is_text_file, should_exclude as core_should_exclude, TEXT_EXTENSIONS as CORE_TEXT_EXTENSIONS
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												feat(agent): 使用用户配置的LLM参数替代硬编码值

重构所有Agent和LLM服务，移除硬编码的temperature和max_tokens参数
添加get_analysis_config函数统一处理分析配置
在LLM测试接口中显示用户保存的配置参数
前端调试面板默认显示LLM测试详细信息

											
										
										
											2025-12-19 16:08:26 +08:00
+								def get_analysis_config(user_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
 								    """
 								    获取分析配置参数（优先使用用户配置，然后使用系统配置）
 								    Returns:
 								        包含以下字段的字典:
 								        - max_analyze_files: 最大分析文件数
 								        - llm_concurrency: LLM 并发数
 								        - llm_gap_ms: LLM 请求间隔（毫秒）
 								    """
 								    other_config = (user_config or {}).get('otherConfig', {})
 								    return {
 								        'max_analyze_files': other_config.get('maxAnalyzeFiles') or settings.MAX_ANALYZE_FILES,
 								        'llm_concurrency': other_config.get('llmConcurrency') or settings.LLM_CONCURRENCY,
 								        'llm_gap_ms': other_config.get('llmGapMs') or settings.LLM_GAP_MS,
 								    }
-												The fast scan mode follows the file exclusion pattern used during RAG embedding.

											
										
										
											2026-01-09 16:41:40 +08:00
+								# 支持的文本文件扩展名使用全局定义
 								TEXT_EXTENSIONS = list(CORE_TEXT_EXTENSIONS)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								def is_text_file(path: str) -> bool:
 								    """检查是否为文本文件"""
-												The fast scan mode follows the file exclusion pattern used during RAG embedding.

											
										
										
											2026-01-09 16:41:40 +08:00
+								    return core_is_text_file(path)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								def should_exclude(path: str, exclude_patterns: List[str] = None) -> bool:
 								    """检查是否应该排除该文件"""
-												The fast scan mode follows the file exclusion pattern used during RAG embedding.

											
										
										
											2026-01-09 16:41:40 +08:00
+								    filename = os.path.basename(path)
 								    return core_should_exclude(path, filename, exclude_patterns)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								def get_language_from_path(path: str) -> str:
 								    """从文件路径获取语言类型"""
 								    ext = path.split('.')[-1].lower() if '.' in path else ''
 								    language_map = {
-												feat: Add extensive language support for code splitting, scanning, and vulnerability detection by expanding file extensions, Tree-sitter node types, vulnerability patterns, and exclusion rules.

											
										
										
											2026-01-06 15:18:38 +08:00
+								        'py': 'python',
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								        'js': 'javascript', 'jsx': 'javascript',
 								        'ts': 'typescript', 'tsx': 'typescript',
-												feat: Add extensive language support for code splitting, scanning, and vulnerability detection by expanding file extensions, Tree-sitter node types, vulnerability patterns, and exclusion rules.

											
										
										
											2026-01-06 15:18:38 +08:00
+								        'java': 'java', 'go': 'go', 'rs': 'rust',
 								        'cpp': 'cpp', 'c': 'c', 'cc': 'cpp', 'h': 'c', 'hh': 'cpp',
 								        'hpp': 'cpp', 'hxx': 'cpp',
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								        'cs': 'csharp', 'php': 'php', 'rb': 'ruby',
-												feat: Add extensive language support for code splitting, scanning, and vulnerability detection by expanding file extensions, Tree-sitter node types, vulnerability patterns, and exclusion rules.

											
										
										
											2026-01-06 15:18:38 +08:00
+								        'kt': 'kotlin', 'ktm': 'kotlin', 'kts': 'kotlin',
 								        'swift': 'swift', 'dart': 'dart',
 								        'scala': 'scala', 'sc': 'scala',
 								        'groovy': 'groovy', 'gsh': 'groovy', 'gvy': 'groovy', 'gy': 'groovy',
 								        'sql': 'sql', 'sh': 'bash', 'bash': 'bash', 'zsh': 'bash',
 								        'pl': 'perl', 'pm': 'perl', 't': 'perl',
 								        'lua': 'lua', 'hs': 'haskell', 'lhs': 'haskell',
 								        'clj': 'clojure', 'cljs': 'clojure', 'cljc': 'clojure', 'edn': 'clojure',
 								        'ex': 'elixir', 'exs': 'elixir', 'erl': 'erlang', 'hrl': 'erlang',
 								        'm': 'objective-c', 'mm': 'objective-c',
 								        'r': 'r', 'rmd': 'r',
 								        'vb': 'visual-basic', 'fs': 'fsharp', 'fsi': 'fsharp', 'fsx': 'fsharp',
 								        'tf': 'hcl', 'hcl': 'hcl', 'dockerfile': 'dockerfile'
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    }
 								    return language_map.get(ext, 'text')
 								class TaskControlManager:
 								    """任务控制管理器 - 用于取消运行中的任务"""
 								    def __init__(self):
 								        self._cancelled_tasks: set = set()
 								    def cancel_task(self, task_id: str):
 								        """取消任务"""
 								        self._cancelled_tasks.add(task_id)
 								        print(f"🛑 任务 {task_id} 已标记为取消")
 								    def is_cancelled(self, task_id: str) -> bool:
 								        """检查任务是否被取消"""
 								        return task_id in self._cancelled_tasks
 								    def cleanup_task(self, task_id: str):
 								        """清理已完成任务的控制状态"""
 								        self._cancelled_tasks.discard(task_id)
 								# 全局任务控制器
 								task_control = TaskControlManager()
 								async def github_api(url: str, token: str = None) -> Any:
 								    """调用GitHub API"""
 								    headers = {"Accept": "application/vnd.github+json"}
 								    t = token or settings.GITHUB_TOKEN
 								    async with httpx.AsyncClient(timeout=30) as client:
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								        # First try with token if available
 								        if t:
 								            headers["Authorization"] = f"Bearer {t}"
 								            try:
 								                response = await client.get(url, headers=headers)
 								                if response.status_code == 200:
 								                    return response.json()
 								                if response.status_code != 401:
 								                    if response.status_code == 403:
 								                        raise Exception("GitHub API 403：请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
 								                    raise Exception(f"GitHub API {response.status_code}: {url}")
 								                # If 401, fall through to retry without token
 								                print(f"[API] GitHub API 401 (Unauthorized) with token, retrying without token for: {url}")
 								            except Exception as e:
 								                if "GitHub API 401" not in str(e) and "401" not in str(e):
 								                    raise
 								        # Try without token
 								        if "Authorization" in headers:
 								            del headers["Authorization"]
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        try:
 								            response = await client.get(url, headers=headers)
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            if response.status_code == 200:
 								                return response.json()
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								            if response.status_code == 403:
 								                raise Exception("GitHub API 403：请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            if response.status_code == 401:
 								                raise Exception("GitHub API 401：请配置 GITHUB_TOKEN 或确认仓库权限")
 								            raise Exception(f"GitHub API {response.status_code}: {url}")
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        except Exception as e:
 								            print(f"[API] GitHub API 调用失败: {url}, 错误: {e}")
 								            raise
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
 								async def gitea_api(url: str, token: str = None) -> Any:
 								    """调用Gitea API"""
 								    headers = {"Content-Type": "application/json"}
 								    t = token or settings.GITEA_TOKEN
 								    async with httpx.AsyncClient(timeout=30) as client:
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								        # First try with token if available
 								        if t:
 								            headers["Authorization"] = f"token {t}"
 								            try:
 								                response = await client.get(url, headers=headers)
 								                if response.status_code == 200:
 								                    return response.json()
 								                if response.status_code != 401:
 								                    if response.status_code == 403:
 								                        raise Exception("Gitea API 403：请确认仓库权限/频率限制")
 								                    raise Exception(f"Gitea API {response.status_code}: {url}")
 								                # If 401, fall through to retry without token
 								                print(f"[API] Gitea API 401 (Unauthorized) with token, retrying without token for: {url}")
 								            except Exception as e:
 								                if "Gitea API 401" not in str(e) and "401" not in str(e):
 								                    raise
 								        # Try without token
 								        if "Authorization" in headers:
 								            del headers["Authorization"]
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        try:
 								            response = await client.get(url, headers=headers)
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            if response.status_code == 200:
 								                return response.json()
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								            if response.status_code == 401:
 								                raise Exception("Gitea API 401：请配置 GITEA_TOKEN 或确认仓库权限")
 								            if response.status_code == 403:
 								                raise Exception("Gitea API 403：请确认仓库权限/频率限制")
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            raise Exception(f"Gitea API {response.status_code}: {url}")
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        except Exception as e:
 								            print(f"[API] Gitea API 调用失败: {url}, 错误: {e}")
 								            raise
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								async def gitlab_api(url: str, token: str = None) -> Any:
 								    """调用GitLab API"""
 								    headers = {"Content-Type": "application/json"}
 								    t = token or settings.GITLAB_TOKEN
 								    async with httpx.AsyncClient(timeout=30) as client:
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								        # First try with token if available
 								        if t:
 								            headers["PRIVATE-TOKEN"] = t
 								            try:
 								                response = await client.get(url, headers=headers)
 								                if response.status_code == 200:
 								                    return response.json()
 								                if response.status_code != 401:
 								                    if response.status_code == 403:
 								                        raise Exception("GitLab API 403：请确认仓库权限/频率限制")
 								                    raise Exception(f"GitLab API {response.status_code}: {url}")
 								                # If 401, fall through to retry without token
 								                print(f"[API] GitLab API 401 (Unauthorized) with token, retrying without token for: {url}")
 								            except Exception as e:
 								                if "GitLab API 401" not in str(e) and "401" not in str(e):
 								                    raise
 								        # Try without token
 								        if "PRIVATE-TOKEN" in headers:
 								            del headers["PRIVATE-TOKEN"]
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        try:
 								            response = await client.get(url, headers=headers)
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            if response.status_code == 200:
 								                return response.json()
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								            if response.status_code == 401:
 								                raise Exception("GitLab API 401：请配置 GITLAB_TOKEN 或确认仓库权限")
 								            if response.status_code == 403:
 								                raise Exception("GitLab API 403：请确认仓库权限/频率限制")
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            raise Exception(f"GitLab API {response.status_code}: {url}")
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								        except Exception as e:
 								            print(f"[API] GitLab API 调用失败: {url}, 错误: {e}")
 								            raise
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Optional[str]:
 								    """获取文件内容"""
 								    async with httpx.AsyncClient(timeout=30) as client:
 								        try:
 								            response = await client.get(url, headers=headers or {})
 								            if response.status_code == 200:
 								                return response.text
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
 								            # 如果带 Token 请求失败（401/403），尝试不带 Token 请求（针对公开仓库）
 								            if response.status_code in (401, 403) and headers:
 								                print(f"[API] 获取文件内容返回 {response.status_code}，尝试不带 Token 重试: {url}")
 								                response = await client.get(url)
 								                if response.status_code == 200:
 								                    return response.text
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								        except Exception as e:
 								            print(f"获取文件内容失败: {url}, 错误: {e}")
 								    return None
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
 								    """获取GitHub仓库分支列表"""
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "github")
 								    owner, repo = repo_info['owner'], repo_info['repo']
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
 								    branches_url = f"https://api.github.com/repos/{owner}/{repo}/branches?per_page=100"
 								    branches_data = await github_api(branches_url, token)
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								    if not isinstance(branches_data, list):
 								        print(f"[Branch] 警告: 获取 GitHub 分支列表返回非列表数据: {branches_data}")
 								        return []
 								    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
 								async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
 								    """获取Gitea仓库分支列表"""
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "gitea")
 								    base_url = repo_info['base_url'] # This is {base}/api/v1
 								    owner, repo = repo_info['owner'], repo_info['repo']
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    branches_url = f"{base_url}/repos/{owner}/{repo}/branches"
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								    branches_data = await gitea_api(branches_url, token)
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								    if not isinstance(branches_data, list):
 								        print(f"[Branch] 警告: 获取 Gitea 分支列表返回非列表数据: {branches_data}")
 								        return []
 								    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
 								    """获取GitLab仓库分支列表"""
 								    parsed = urlparse(repo_url)
 								    extracted_token = token
 								    if parsed.username:
 								        if parsed.username == 'oauth2' and parsed.password:
 								            extracted_token = parsed.password
 								        elif parsed.username and not parsed.password:
 								            extracted_token = parsed.username
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "gitlab")
 								    base_url = repo_info['base_url']
 								    project_path = quote(repo_info['project_path'], safe='')
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    branches_url = f"{base_url}/projects/{project_path}/repository/branches?per_page=100"
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								    branches_data = await gitlab_api(branches_url, extracted_token)
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								    if not isinstance(branches_data, list):
 								        print(f"[Branch] 警告: 获取 GitLab 分支列表返回非列表数据: {branches_data}")
 								        return []
 								    return [b["name"] for b in branches_data if isinstance(b, dict) and "name" in b]
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
 								async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    """获取GitHub仓库文件列表"""
 								    # 解析仓库URL
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "github")
 								    owner, repo = repo_info['owner'], repo_info['repo']
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								    # 获取仓库文件树
 								    tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
 								    tree_data = await github_api(tree_url, token)
 								    files = []
 								    for item in tree_data.get("tree", []):
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            size = item.get("size", 0)
 								            if size <= settings.MAX_FILE_SIZE_BYTES:
 								                files.append({
 								                    "path": item["path"],
 								                    "url": f"https://raw.githubusercontent.com/{owner}/{repo}/{quote(branch)}/{item['path']}"
 								                })
 								    return files
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    """获取GitLab仓库文件列表"""
 								    parsed = urlparse(repo_url)
 								    # 从URL中提取token（如果存在）
 								    extracted_token = token
 								    if parsed.username:
 								        if parsed.username == 'oauth2' and parsed.password:
 								            extracted_token = parsed.password
 								        elif parsed.username and not parsed.password:
 								            extracted_token = parsed.username
 								    # 解析项目路径
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "gitlab")
 								    base_url = repo_info['base_url'] # {base}/api/v4
 								    project_path = quote(repo_info['project_path'], safe='')
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								    # 获取仓库文件树
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    tree_url = f"{base_url}/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    tree_data = await gitlab_api(tree_url, extracted_token)
 								    files = []
 								    for item in tree_data:
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            files.append({
 								                "path": item["path"],
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								                "url": f"{base_url}/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								                "token": extracted_token
 								            })
 								    return files
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
 								async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
 								    """获取Gitea仓库文件列表"""
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								    repo_info = parse_repository_url(repo_url, "gitea")
 								    base_url = repo_info['base_url']
 								    owner, repo = repo_info['owner'], repo_info['repo']
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								    # Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=true
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								    # 可以直接使用分支名作为sha
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								    tree_url = f"{base_url}/repos/{quote(owner)}/{quote(repo)}/git/trees/{quote(branch)}?recursive=true"
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								    tree_data = await gitea_api(tree_url, token)
 								    files = []
 								    for item in tree_data.get("tree", []):
 								         # Gitea API returns 'type': 'blob' for files
 								        if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								            # 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch}
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								             files.append({
 								                "path": item["path"],
-												feat: enhance Gitea support and merge upstream v3.0.0

- Merge upstream v3.0.0 changes
- Fix security vulnerabilities (SSRF, Path Traversal) by introducing `parse_repository_url` utility
- Fix token leakage and handling in `scanner.py` and `projects.py`
- Fix `NameError` in `scanner.py`
- Fix `frontend/docker-entrypoint.sh` API URL escaping
- Standardize Gitea token naming to `gitea_token`

											
										
										
											2025-12-17 11:02:42 +08:00
+								                "url": f"{base_url}/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
+								                "token": token # 传递token以便fetch_file_content使用
 								            })
 								    return files
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
 								    """
 								    后台仓库扫描任务
 								    Args:
 								        task_id: 任务ID
 								        db_session_factory: 数据库会话工厂
 								        user_config: 用户配置字典（包含llmConfig和otherConfig）
 								    """
 								    async with db_session_factory() as db:
 								        task = await db.get(AuditTask, task_id)
 								        if not task:
 								            return
 								        try:
 								            # 1. 更新状态为运行中
 								            task.status = "running"
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								            task.started_at = datetime.now(timezone.utc)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            await db.commit()
 								            # 创建使用用户配置的LLM服务实例
 								            llm_service = LLMService(user_config=user_config or {})
 								            # 2. 获取项目信息
 								            project = await db.get(Project, task.project_id)
-												feat(projects): add ZIP file upload support and source type tracking

- Add source_type field to projects model to distinguish between repository and ZIP sources
- Implement ZIP file storage service with save, load, delete, and metadata operations
- Add database migration to populate source_type for existing projects
- Create ZIP upload endpoint with file handling and metadata tracking
- Add ZIP download endpoint for project file retrieval
- Implement project ZIP info endpoint to check file status and metadata
- Update project creation to support both repository and ZIP source types
- Add project type constants and utility functions for source type handling
- Update database export/import to include source_type field
- Extend frontend components to support ZIP file uploads in project creation
- Add instant analysis page for direct ZIP file scanning without project creation
- Update .gitignore to exclude uploaded ZIP files and metadata
- Enhance project detail and task detail pages with ZIP file management UI

											
										
										
											2025-11-28 17:38:12 +08:00
+								            if not project:
 								                raise Exception("项目不存在")
 								            # 检查项目类型 - 仅支持仓库类型项目
 								            source_type = getattr(project, 'source_type', 'repository')
 								            if source_type == 'zip':
 								                raise Exception("ZIP类型项目请使用ZIP上传扫描接口")
 								            if not project.repository_url:
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								                raise Exception("仓库地址不存在")
 								            repo_url = project.repository_url
 								            branch = task.branch_name or project.default_branch or "main"
 								            repo_type = project.repository_type or "other"
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
 								            # 解析任务的排除模式
 								            import json as json_module
 								            task_exclude_patterns = []
 								            if task.exclude_patterns:
 								                try:
 								                    task_exclude_patterns = json_module.loads(task.exclude_patterns)
 								                except:
 								                    pass
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												feat(projects): add ZIP file upload support and source type tracking

- Add source_type field to projects model to distinguish between repository and ZIP sources
- Implement ZIP file storage service with save, load, delete, and metadata operations
- Add database migration to populate source_type for existing projects
- Create ZIP upload endpoint with file handling and metadata tracking
- Add ZIP download endpoint for project file retrieval
- Implement project ZIP info endpoint to check file status and metadata
- Update project creation to support both repository and ZIP source types
- Add project type constants and utility functions for source type handling
- Update database export/import to include source_type field
- Extend frontend components to support ZIP file uploads in project creation
- Add instant analysis page for direct ZIP file scanning without project creation
- Update .gitignore to exclude uploaded ZIP files and metadata
- Enhance project detail and task detail pages with ZIP file management UI

											
										
										
											2025-11-28 17:38:12 +08:00
+								            print(f"🚀 开始扫描仓库: {repo_url}, 分支: {branch}, 类型: {repo_type}, 来源: {source_type}")
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								            if task_exclude_patterns:
 								                print(f"📋 排除模式: {task_exclude_patterns}")
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								            # 3. 获取文件列表
-												feat: Centralize Git tokens to system environment variables and add Gitea branch verification.

											
										
										
											2026-01-05 17:12:47 +08:00
+								            # Git Token 始终来自系统默认（.env），逻辑锁定
 								            github_token = settings.GITHUB_TOKEN
 								            gitlab_token = settings.GITLAB_TOKEN
 								            gitea_token = settings.GITEA_TOKEN
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								            # 获取SSH私钥（如果配置了）
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            user_other_config = user_config.get('otherConfig', {}) if user_config else {}
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								            ssh_private_key = None
 								            if 'sshPrivateKey' in user_other_config:
 								                from app.core.encryption import decrypt_sensitive_data
 								                ssh_private_key = decrypt_sensitive_data(user_other_config['sshPrivateKey'])
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            files: List[Dict[str, str]] = []
 								            extracted_gitlab_token = None
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								            # 检查是否为SSH URL
 								            from app.services.git_ssh_service import GitSSHOperations
 								            is_ssh_url = GitSSHOperations.is_ssh_url(repo_url)
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								            if is_ssh_url:
 								                # 使用SSH方式获取文件
 								                if not ssh_private_key:
 								                    raise Exception("仓库使用SSH URL，但未配置SSH密钥。请先生成并配置SSH密钥。")
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								                print(f"🔐 使用SSH方式访问仓库: {repo_url}")
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
+								                try:
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								                    files_with_content = GitSSHOperations.get_repo_files_via_ssh(
 								                        repo_url, ssh_private_key, branch, task_exclude_patterns
 								                    )
 								                    # 转换为统一格式
 								                    files = [{'path': f['path'], 'content': f['content']} for f in files_with_content]
 								                    actual_branch = branch
 								                    print(f"✅ 通过SSH成功获取 {len(files)} 个文件")
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
+								                except Exception as e:
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								                    raise Exception(f"SSH方式获取仓库文件失败: {str(e)}")
 								            else:
 								                # 使用API方式获取文件（原有逻辑）
 								                # 构建分支尝试顺序（分支降级机制）
 								                branches_to_try = [branch]
 								                if project.default_branch and project.default_branch != branch:
 								                    branches_to_try.append(project.default_branch)
 								                for common_branch in ["main", "master"]:
 								                    if common_branch not in branches_to_try:
 								                        branches_to_try.append(common_branch)
 								                actual_branch = branch  # 实际使用的分支
 								                last_error = None
 								                for try_branch in branches_to_try:
 								                    try:
 								                        print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
 								                        if repo_type == "github":
 								                            files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
 								                        elif repo_type == "gitlab":
 								                            files = await get_gitlab_files(repo_url, try_branch, gitlab_token, task_exclude_patterns)
 								                            # GitLab文件可能带有token
 								                            if files and 'token' in files[0]:
 								                                extracted_gitlab_token = files[0].get('token')
-												Merge branch 'v3.0.0' of github.com:lintsinghua/DeepAudit into feature/git_ssh

# Conflicts:
#	backend/app/services/scanner.py
#	backend/uv.lock
#	frontend/src/pages/Projects.tsx

											
										
										
											2025-12-25 14:41:09 +08:00
+								                        elif repo_type == "gitea":
 								                            files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
+								                        else:
-												Merge branch 'v3.0.0' of github.com:lintsinghua/DeepAudit into feature/git_ssh

# Conflicts:
#	backend/app/services/scanner.py
#	backend/uv.lock
#	frontend/src/pages/Projects.tsx

											
										
										
											2025-12-25 14:41:09 +08:00
+								                            raise Exception("不支持的仓库类型，仅支持 GitHub, GitLab 和 Gitea 仓库")
-												✨ feat(ssh)：新增SSH密钥认证支持，支持通过SSH方式访问Git仓库

新增SSH密钥管理功能，包括生成、查看、测试和删除SSH密钥对。在agent_tasks.py中集成SSH私钥解密和SSH克隆逻辑，支持git@格式的SSH URL。在projects.py中为SSH URL添加文件获取支持。新增ssh_keys.py端点提供完整的SSH密钥API管理。前端Account页面新增SSH密钥管理界面，Projects页面支持选择SSH Key认证类型。新增git_ssh_service.py提供SSH密钥生成、验证和Git SSH操作功能。

											
										
										
											2025-12-24 16:08:56 +08:00
 								                        if files:
 								                            actual_branch = try_branch
 								                            if try_branch != branch:
 								                                print(f"⚠️ 分支 {branch} 不存在或无法访问，已降级到分支 {try_branch}")
 								                            break
 								                    except Exception as e:
 								                        last_error = str(e)
 								                        print(f"⚠️ 获取分支 {try_branch} 失败: {last_error[:100]}")
 								                        continue
 								                if not files:
 								                    error_msg = f"无法获取仓库文件，所有分支尝试均失败"
 								                    if last_error:
 								                        if "404" in last_error or "Not Found" in last_error:
 								                            error_msg = f"仓库或分支不存在: {branch}"
 								                        elif "401" in last_error or "403" in last_error:
 								                            error_msg = "无访问权限，请检查 Token 配置"
 								                        else:
 								                            error_msg = f"获取文件失败: {last_error[:100]}"
 								                    raise Exception(error_msg)
-												修复Agent审计任务识别不到文件的错误

											
										
										
											2025-12-16 12:34:57 +08:00
 								            print(f"✅ 成功获取分支 {actual_branch} 的文件列表")
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												feat(agent): 使用用户配置的LLM参数替代硬编码值

重构所有Agent和LLM服务，移除硬编码的temperature和max_tokens参数
添加get_analysis_config函数统一处理分析配置
在LLM测试接口中显示用户保存的配置参数
前端调试面板默认显示LLM测试详细信息

											
										
										
											2025-12-19 16:08:26 +08:00
+								            # 获取分析配置（优先使用用户配置）
 								            analysis_config = get_analysis_config(user_config)
 								            max_analyze_files = analysis_config['max_analyze_files']
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
+								            analysis_concurrency = analysis_config['llm_concurrency'] # 并发数
-												feat(agent): 使用用户配置的LLM参数替代硬编码值

重构所有Agent和LLM服务，移除硬编码的temperature和max_tokens参数
添加get_analysis_config函数统一处理分析配置
在LLM测试接口中显示用户保存的配置参数
前端调试面板默认显示LLM测试详细信息

											
										
										
											2025-12-19 16:08:26 +08:00
+								            llm_gap_ms = analysis_config['llm_gap_ms']
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            # 限制文件数量
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								            # 如果指定了特定文件，则只分析这些文件
 								            target_files = (user_config or {}).get('scan_config', {}).get('file_paths', [])
 								            if target_files:
 								                print(f"🎯 指定分析 {len(target_files)} 个文件")
 								                files = [f for f in files if f['path'] in target_files]
-												feat(agent): 使用用户配置的LLM参数替代硬编码值

重构所有Agent和LLM服务，移除硬编码的temperature和max_tokens参数
添加get_analysis_config函数统一处理分析配置
在LLM测试接口中显示用户保存的配置参数
前端调试面板默认显示LLM测试详细信息

											
										
										
											2025-12-19 16:08:26 +08:00
+								            elif max_analyze_files > 0:
 								                files = files[:max_analyze_files]
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            task.total_files = len(files)
 								            await db.commit()
-												feat(agent): 使用用户配置的LLM参数替代硬编码值

重构所有Agent和LLM服务，移除硬编码的temperature和max_tokens参数
添加get_analysis_config函数统一处理分析配置
在LLM测试接口中显示用户保存的配置参数
前端调试面板默认显示LLM测试详细信息

											
										
										
											2025-12-19 16:08:26 +08:00
+								            print(f"📊 获取到 {len(files)} 个文件，开始分析 (最大文件数: {max_analyze_files}, 请求间隔: {llm_gap_ms}ms)")
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								            # 4. 分析文件
 								            total_issues = 0
 								            total_lines = 0
 								            quality_scores = []
 								            scanned_files = 0
 								            failed_files = 0
-												feat(scanner): add file skipping logic and enhanced debug logging

- Add skipped_files counter to track empty and oversized files
- Implement file content validation to skip empty files
- Add file size check to skip files exceeding MAX_FILE_SIZE_BYTES limit
- Add detailed debug logging for file fetching, LLM analysis, and completion
- Add comprehensive error logging with traceback information for failed files
- Improve task status determination logic to distinguish between skipped files and LLM failures
- Mark tasks as completed when all files are empty/skipped instead of failing
- Update error messages to show actual failed file count instead of total files
- Enhance observability during code analysis workflow with emoji-prefixed status messages

											
										
										
											2025-12-06 21:21:36 +08:00
+								            skipped_files = 0  # 跳过的文件（空文件、太大等）
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            consecutive_failures = 0
 								            MAX_CONSECUTIVE_FAILURES = 5
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								            last_error = None
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
+								            # 4. 并行分析文件
 								            print(f"🧬 启动并行分析: {len(files)} 个文件, 并发数: {analysis_concurrency}")
 								            semaphore = asyncio.Semaphore(analysis_concurrency)
 								            async def analyze_single_file(file_info):
 								                """内部函数：分析单个文件并返回结果"""
 								                nonlocal consecutive_failures, last_error
 								                async with semaphore:
 								                    if task_control.is_cancelled(task_id):
 								                        return None
 								                    f_path = file_info['path']
 								                    MAX_RETRIES = 3
 								                    for attempt in range(MAX_RETRIES):
 								                        try:
 								                            # 4.1 获取文件内容 (仅在第一次尝试或内容获取失败时获取)
 								                            if attempt == 0:
 								                                if is_ssh_url:
 								                                    content = file_info.get('content', '')
 								                                else:
 								                                    headers = {}
 								                                    if repo_type == "gitlab":
 								                                        token_to_use = file_info.get('token') or gitlab_token
 								                                        if token_to_use: headers["PRIVATE-TOKEN"] = token_to_use
 								                                    elif repo_type == "gitea":
 								                                        token_to_use = file_info.get('token') or gitea_token
 								                                        if token_to_use: headers["Authorization"] = f"token {token_to_use}"
 								                                    elif repo_type == "github" and github_token:
 								                                        headers["Authorization"] = f"Bearer {github_token}"
 								                                    content = await fetch_file_content(file_info["url"], headers)
 								                                if not content or not content.strip():
 								                                    return {"type": "skip", "reason": "empty", "path": f_path}
 								                                if len(content) > settings.MAX_FILE_SIZE_BYTES:
 								                                    return {"type": "skip", "reason": "too_large", "path": f_path}
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								                            if task_control.is_cancelled(task_id):
 								                                return None
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
+								                            # 4.2 LLM 分析
 								                            language = get_language_from_path(f_path)
 								                            scan_config = (user_config or {}).get('scan_config', {})
 								                            rule_set_id = scan_config.get('rule_set_id')
 								                            prompt_template_id = scan_config.get('prompt_template_id')
 								                            if rule_set_id or prompt_template_id:
 								                                analysis_result = await llm_service.analyze_code_with_rules(
 								                                    content, language,
 								                                    rule_set_id=rule_set_id,
 								                                    prompt_template_id=prompt_template_id,
 								                                    db_session=None
 								                                )
 								                            else:
 								                                analysis_result = await llm_service.analyze_code(content, language)
 								                            return {
 								                                "type": "success",
 								                                "path": f_path,
 								                                "content": content,
 								                                "language": language,
 								                                "analysis": analysis_result
 								                            }
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								                        except asyncio.CancelledError:
 								                            # 捕获取消异常，不再重试
 								                            return None
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
+								                        except Exception as e:
 								                            if attempt < MAX_RETRIES - 1:
 								                                wait_time = (attempt + 1) * 2
-												feat: Add specific rate limit error messages for ZIP task analysis, instant analysis, and repository scanning.

											
										
										
											2026-01-16 10:21:30 +08:00
+								                                # 特殊处理限流错误提示
 								                                error_str = str(e)
 								                                if "429" in error_str or "rate limit" in error_str.lower() or "额度不足" in error_str:
 								                                    print(f"🚫 [限流提示] 仓库扫描任务触发 LLM 频率限制 (429)，建议在设置中降低并发数或增加请求间隔。文件: {f_path}")
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
+								                                print(f"⚠️ 分析文件失败 ({f_path}), 正在进行第 {attempt+1} 次重试... 错误: {e}")
 								                                await asyncio.sleep(wait_time)
 								                                continue
 								                            else:
 								                                print(f"❌ 分析文件最终失败 ({f_path}): {e}")
 								                                last_error = str(e)
 								                                return {"type": "error", "path": f_path, "error": str(e)}
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								            # 创建所有分析任务对象以便跟踪
 								            task_objects = [asyncio.create_task(analyze_single_file(f)) for f in files]
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								            try:
 								                # 使用 as_completed 处理结果，这样可以实时更新进度且安全使用当前 db session
 								                for future in asyncio.as_completed(task_objects):
 								                    if task_control.is_cancelled(task_id):
 								                        # 停止处理后续完成的任务
 								                        print(f"🛑 任务 {task_id} 检测到取消信号，停止主循环")
 								                        break
 								                    try:
 								                        res = await future
 								                    except asyncio.CancelledError:
 								                        continue
 								                    if not res: continue
 								                    if res["type"] == "skip":
 								                        skipped_files += 1
 								                        task.total_files = max(0, task.total_files - 1)
 								                    elif res["type"] == "error":
 								                        failed_files += 1
 								                        consecutive_failures += 1
 								                    elif res["type"] == "success":
 								                        consecutive_failures = 0
 								                        scanned_files += 1
 								                        f_path = res["path"]
 								                        analysis = res["analysis"]
 								                        file_lines = res["content"].split('\n')
 								                        total_lines += len(file_lines)
 								                        # 保存问题
 								                        issues = analysis.get("issues", [])
 								                        for issue in issues:
-												fix: Enhance robustness of issue and quality score processing in the scanner service by adding defensive checks and error handling.

											
										
										
											2026-01-30 15:16:51 +08:00
+								                            try:
 								                                # 防御性检查：确保 issue 是字典
 								                                if not isinstance(issue, dict):
 								                                    print(f"⚠️ 警告: 任务 {task_id} 中文件 {f_path} 的分析结果包含无效的问题格式: {issue}")
 								                                    continue
 								                                line_num = issue.get("line", 1)
 								                                code_snippet = issue.get("code_snippet")
 								                                if not code_snippet or len(code_snippet.strip()) < 5:
 								                                    try:
 								                                        idx = max(0, int(line_num) - 1)
 								                                        start = max(0, idx - 2)
 								                                        end = min(len(file_lines), idx + 3)
 								                                        code_snippet = '\n'.join(file_lines[start:end])
 								                                    except Exception:
 								                                        code_snippet = ""
 								                                audit_issue = AuditIssue(
 								                                    task_id=task.id,
 								                                    file_path=f_path,
 								                                    line_number=line_num,
 								                                    column_number=issue.get("column"),
 								                                    issue_type=issue.get("type", "maintainability"),
 								                                    severity=issue.get("severity", "low"),
 								                                    title=issue.get("title", "Issue"),
 								                                    message=issue.get("description") or issue.get("title", "Issue"),
 								                                    suggestion=issue.get("suggestion"),
 								                                    code_snippet=code_snippet,
 								                                    ai_explanation=issue.get("ai_explanation"),
 								                                    status="open"
 								                                )
 								                                db.add(audit_issue)
 								                                total_issues += 1
 								                            except Exception as e:
 								                                print(f"⚠️ 处理单个问题时出错 (文件 {f_path}): {e}")
 								                                continue
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
 								                        if "quality_score" in analysis:
-												fix: Enhance robustness of issue and quality score processing in the scanner service by adding defensive checks and error handling.

											
										
										
											2026-01-30 15:16:51 +08:00
+								                            try:
 								                                quality_score = float(analysis["quality_score"])
 								                                quality_scores.append(quality_score)
 								                            except (ValueError, TypeError):
 								                                pass
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
 								                    # 更新主任务进度
 								                    processed_count = scanned_files + failed_files
 								                    task.scanned_files = processed_count
 								                    task.total_lines = total_lines
 								                    task.issues_count = total_issues
 								                    await db.commit() # 这里的 commit 是在一个协程里按序进行的，是安全的
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								                    if processed_count % 10 == 0 or processed_count == len(files):
 								                        print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")
 								                    if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
 								                        print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次，停止分析")
 								                        break
 								            finally:
 								                # 无论正常结束、中途 break 还是发生异常，都确保取消所有未完成的任务
 								                pending_count = 0
 								                for t in task_objects:
 								                    if not t.done():
 								                        t.cancel()
 								                        pending_count += 1
-												Optimize parallel LLM calls while addressing the circular issue in Agent auditing.

											
										
										
											2026-01-09 17:30:18 +08:00
-												feat: Implement robust task cancellation and cleanup for file analysis tasks.

											
										
										
											2026-01-30 14:49:12 +08:00
+								                if pending_count > 0:
 								                    print(f"🧹 任务 {task_id}: 已清理 {pending_count} 个后台待处理或执行中的任务")
 								                    # 等待一下让取消逻辑执行完毕，但不阻塞太久
 								                    await asyncio.gather(*task_objects, return_exceptions=True)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								            # 5. 完成任务
 								            avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
-												feat(scanner): add file skipping logic and enhanced debug logging

- Add skipped_files counter to track empty and oversized files
- Implement file content validation to skip empty files
- Add file size check to skip files exceeding MAX_FILE_SIZE_BYTES limit
- Add detailed debug logging for file fetching, LLM analysis, and completion
- Add comprehensive error logging with traceback information for failed files
- Improve task status determination logic to distinguish between skipped files and LLM failures
- Mark tasks as completed when all files are empty/skipped instead of failing
- Update error messages to show actual failed file count instead of total files
- Enhance observability during code analysis workflow with emoji-prefixed status messages

											
										
										
											2025-12-06 21:21:36 +08:00
+								            # 判断任务状态
 								            # 如果所有文件都被跳过（空文件等），标记为完成但给出提示
 								            if len(files) > 0 and scanned_files == 0 and skipped_files == len(files):
 								                task.status = "completed"
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								                task.completed_at = datetime.now(timezone.utc)
-												feat(scanner): add file skipping logic and enhanced debug logging

- Add skipped_files counter to track empty and oversized files
- Implement file content validation to skip empty files
- Add file size check to skip files exceeding MAX_FILE_SIZE_BYTES limit
- Add detailed debug logging for file fetching, LLM analysis, and completion
- Add comprehensive error logging with traceback information for failed files
- Improve task status determination logic to distinguish between skipped files and LLM failures
- Mark tasks as completed when all files are empty/skipped instead of failing
- Update error messages to show actual failed file count instead of total files
- Enhance observability during code analysis workflow with emoji-prefixed status messages

											
										
										
											2025-12-06 21:21:36 +08:00
+								                task.scanned_files = 0
 								                task.total_lines = 0
 								                task.issues_count = 0
 								                task.quality_score = 100.0
 								                await db.commit()
 								                print(f"⚠️ 任务 {task_id} 完成: 所有 {len(files)} 个文件均为空或被跳过，无需分析")
 								            # 如果有文件需要分析但全部失败（LLM调用失败），标记为失败
 								            elif len(files) > 0 and scanned_files == 0 and failed_files > 0:
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								                task.status = "failed"
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								                task.completed_at = datetime.now(timezone.utc)
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								                task.scanned_files = 0
 								                task.total_lines = total_lines
 								                task.issues_count = 0
 								                task.quality_score = 0
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
 								                # 尝试从最后一个错误中获取更详细的系统提示
 								                error_msg = f"{failed_files} 个文件分析失败，请检查 LLM API 配置。最近一个错误: {str(last_error) if 'last_error' in locals() else '未知错误'}"
 								                task.error_message = error_msg
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								                await db.commit()
-												feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

											
										
										
											2026-01-06 10:53:49 +08:00
+								                print(f"❌ 任务 {task_id} 失败: {error_msg}")
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								            else:
 								                task.status = "completed"
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								                task.completed_at = datetime.now(timezone.utc)
-												Restore the CI history deletion feature

											
										
										
											2026-01-08 16:15:19 +08:00
+								                # 最终显示的已扫描文件数为成功分析的文件数
-												feat(audit): refactor task creation with file selection and advanced options

- Add FileSelectionDialog component for granular file selection in audit tasks
- Extract task form logic into useTaskForm and useZipFile custom hooks
- Create modular components: BasicConfig, AdvancedOptions, ExcludePatterns, ProjectSelector, ZipFileSection
- Add file listing endpoint GET /projects/{id}/files with branch support
- Add branch listing endpoint GET /projects/{id}/branches for repository projects
- Implement ScanRequest model with file_paths, exclude_patterns, and branch_name fields
- Update scan endpoint to accept selective file scanning and exclude patterns
- Add branch_name and exclude_patterns fields to AuditTask model
- Enhance scanner service with GitHub and GitLab file/branch retrieval functions
- Improve CreateTaskDialog with better UX for repository and ZIP file scanning
- Support per-scan configuration storage in audit tasks
- Refactor repository scan services to handle file selection and branch parameters

											
										
										
											2025-12-06 20:47:28 +08:00
+								                task.scanned_files = scanned_files
 								                task.total_lines = total_lines
 								                task.issues_count = total_issues
 								                task.quality_score = avg_quality_score
 								                await db.commit()
-												Restore the CI history deletion feature

											
										
										
											2026-01-08 16:15:19 +08:00
 								                result_msg = f"✅ 任务 {task_id} 完成: 成功分析 {scanned_files} 个文件"
 								                if failed_files > 0:
 								                    result_msg += f", {failed_files} 个文件失败"
 								                result_msg += f", 发现 {total_issues} 个问题, 质量分 {avg_quality_score:.1f}"
 								                print(result_msg)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            task_control.cleanup_task(task_id)
 								        except Exception as e:
 								            print(f"❌ 扫描失败: {e}")
 								            task.status = "failed"
-												fix: 修复时间显示问题，使用带时区的UTC时间

- 将所有 datetime.utcnow() 替换为 datetime.now(timezone.utc)
- 修复 completed_at, started_at, updated_at, resolved_at 等时间字段
- 修复 JWT token 过期时间计算
- 修复数据导出和ZIP上传时间戳
- 调整README中项目管理和审计报告图片显示比例

											
										
										
											2025-12-09 17:47:34 +08:00
+								            task.completed_at = datetime.now(timezone.utc)
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								            await db.commit()
 								            task_control.cleanup_task(task_id)