From 364b8cea4252b6dc904c2e793df292359574517f Mon Sep 17 00:00:00 2001 From: lintsinghua Date: Tue, 16 Dec 2025 12:34:57 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DAgent=E5=AE=A1=E8=AE=A1?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E8=AF=86=E5=88=AB=E4=B8=8D=E5=88=B0=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/Dockerfile | 124 +++++++---- backend/app/api/v1/endpoints/agent_tasks.py | 216 ++++++++++++++++---- backend/app/services/scanner.py | 59 +++++- docker-compose.yml | 14 ++ frontend/Dockerfile | 23 ++- 5 files changed, 336 insertions(+), 100 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index f2bae6e..d26ef5b 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,49 +1,104 @@ -FROM python:3.12-slim +# ============================================ +# 多阶段构建 - 构建阶段 +# ============================================ +FROM python:3.12-slim AS builder WORKDIR /app ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 -# 清除代理设置,避免容器内网络问题 -ENV http_proxy= -ENV https_proxy= -ENV HTTP_PROXY= -ENV HTTPS_PROXY= -# 配置 apt 重试机制以处理网络不稳定 -RUN echo 'Acquire::Retries "3";' > /etc/apt/apt.conf.d/80-retries +# 彻底清除代理设置 +ENV http_proxy="" +ENV https_proxy="" +ENV HTTP_PROXY="" +ENV HTTPS_PROXY="" +ENV all_proxy="" +ENV ALL_PROXY="" +ENV no_proxy="*" +ENV NO_PROXY="*" -# 安装系统依赖(包含 WeasyPrint 所需的库和中文字体支持) -RUN rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \ - unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \ - apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ +# 清除 apt 代理配置并配置重试,使用阿里云镜像 +RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \ + sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list 2>/dev/null || true && \ + rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \ + echo 'Acquire::http::Proxy "false";' > /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::https::Proxy "false";' >> /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::Retries "5";' >> /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::http::Timeout "60";' >> /etc/apt/apt.conf.d/99-no-proxy + +# 安装构建依赖(gcc 只在构建阶段需要) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ gcc \ libpq-dev \ - curl \ - libpango-1.0-0 \ - libpangoft2-1.0-0 \ - libpangocairo-1.0-0 \ - libcairo2 \ - libgdk-pixbuf-2.0-0 \ libffi-dev \ - libglib2.0-0 \ - shared-mime-info \ - fonts-noto-cjk \ - fonts-noto-cjk-extra \ - fontconfig \ - && fc-cache -fv \ && rm -rf /var/lib/apt/lists/* # 安装 uv COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv -# 复制依赖文件和 README +# 配置 uv 使用官方 PyPI(国内镜像不稳定时使用) +ENV UV_INDEX_URL=https://pypi.org/simple/ + +# 复制依赖文件 COPY pyproject.toml uv.lock README.md ./ -# 使用 uv 安装依赖(确保无代理) -RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \ - uv sync --frozen --no-dev +# 安装 Python 依赖到虚拟环境 +RUN uv sync --frozen --no-dev + +# ============================================ +# 多阶段构建 - 运行阶段 +# ============================================ +FROM python:3.12-slim AS runtime + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# 彻底清除代理设置 +ENV http_proxy="" +ENV https_proxy="" +ENV HTTP_PROXY="" +ENV HTTPS_PROXY="" +ENV all_proxy="" +ENV ALL_PROXY="" +ENV no_proxy="*" +ENV NO_PROXY="*" + +# 配置使用阿里云 Debian 镜像源 +RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \ + sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list 2>/dev/null || true + +# 清除 apt 代理配置 +RUN rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \ + echo 'Acquire::http::Proxy "false";' > /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::https::Proxy "false";' >> /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::Retries "5";' >> /etc/apt/apt.conf.d/99-no-proxy && \ + echo 'Acquire::http::Timeout "60";' >> /etc/apt/apt.conf.d/99-no-proxy + +# 只安装运行时依赖(不需要 gcc) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libpq5 \ + curl \ + git \ + libpango-1.0-0 \ + libpangoft2-1.0-0 \ + libpangocairo-1.0-0 \ + libcairo2 \ + libgdk-pixbuf-2.0-0 \ + libglib2.0-0 \ + shared-mime-info \ + fonts-noto-cjk \ + && fc-cache -fv \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +# 从构建阶段复制虚拟环境和 uv +COPY --from=builder /app/.venv /app/.venv +COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv # 复制应用代码 COPY . . @@ -54,12 +109,5 @@ RUN mkdir -p /app/uploads/zip_files # 暴露端口 EXPOSE 8000 -# 启动命令(开发模式由 docker-compose 覆盖) -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] - - - - - - - +# 启动命令 +CMD [".venv/bin/uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/app/api/v1/endpoints/agent_tasks.py b/backend/app/api/v1/endpoints/agent_tasks.py index a98f7cb..4bbec44 100644 --- a/backend/app/api/v1/endpoints/agent_tasks.py +++ b/backend/app/api/v1/endpoints/agent_tasks.py @@ -261,12 +261,23 @@ async def _execute_agent_task(task_id: str): if not project: logger.error(f"Project not found for task {task_id}") return - - # 获取项目根目录 - project_root = await _get_project_root(project, task_id) - - # 获取用户配置 + + # 获取用户配置(需要在获取项目根目录之前,以便传递 token) user_config = await _get_user_config(db, task.created_by) + + # 从用户配置中提取 token(用于私有仓库克隆) + other_config = (user_config or {}).get('otherConfig', {}) + github_token = other_config.get('githubToken') or settings.GITHUB_TOKEN + gitlab_token = other_config.get('gitlabToken') or settings.GITLAB_TOKEN + + # 获取项目根目录(传递任务指定的分支和认证 token) + project_root = await _get_project_root( + project, + task_id, + task.branch_name, + github_token=github_token, + gitlab_token=gitlab_token, + ) # 更新状态为运行中 task.status = AgentTaskStatus.RUNNING @@ -1164,6 +1175,7 @@ async def create_agent_task( current_phase=AgentTaskPhase.PLANNING, target_vulnerabilities=request.target_vulnerabilities, verification_level=request.verification_level or "sandbox", + branch_name=request.branch_name, # 保存用户选择的分支 exclude_patterns=request.exclude_patterns, target_files=request.target_files, max_iterations=request.max_iterations or 50, @@ -1835,29 +1847,52 @@ async def update_finding_status( # ============ Helper Functions ============ -async def _get_project_root(project: Project, task_id: str) -> str: +async def _get_project_root( + project: Project, + task_id: str, + branch_name: Optional[str] = None, + github_token: Optional[str] = None, + gitlab_token: Optional[str] = None, +) -> str: """ 获取项目根目录 - + 支持两种项目类型: - ZIP 项目:解压 ZIP 文件到临时目录 - 仓库项目:克隆仓库到临时目录 + + Args: + project: 项目对象 + task_id: 任务ID + branch_name: 分支名称(仓库项目使用,优先于 project.default_branch) + github_token: GitHub 访问令牌(用于私有仓库) + gitlab_token: GitLab 访问令牌(用于私有仓库) + + Returns: + 项目根目录路径 + + Raises: + RuntimeError: 当项目文件获取失败时 """ import zipfile import subprocess - + import shutil + from urllib.parse import urlparse, urlunparse + base_path = f"/tmp/deepaudit/{task_id}" - - # 确保目录存在 + + # 确保目录存在且为空 + if os.path.exists(base_path): + shutil.rmtree(base_path) os.makedirs(base_path, exist_ok=True) - + # 根据项目类型处理 if project.source_type == "zip": # 🔥 ZIP 项目:解压 ZIP 文件 from app.services.zip_storage import load_project_zip - + zip_path = await load_project_zip(project.id) - + if zip_path and os.path.exists(zip_path): try: with zipfile.ZipFile(zip_path, 'r') as zip_ref: @@ -1865,44 +1900,141 @@ async def _get_project_root(project: Project, task_id: str) -> str: logger.info(f"✅ Extracted ZIP project {project.id} to {base_path}") except Exception as e: logger.error(f"Failed to extract ZIP {zip_path}: {e}") + raise RuntimeError(f"无法解压项目文件: {e}") else: logger.warning(f"⚠️ ZIP file not found for project {project.id}") - + raise RuntimeError(f"项目 ZIP 文件不存在: {project.id}") + elif project.source_type == "repository" and project.repository_url: # 🔥 仓库项目:克隆仓库 + repo_url = project.repository_url + repo_type = project.repository_type or "other" + + # 检查 git 是否可用(使用 git --version 更可靠) try: - branch = project.default_branch or "main" - repo_url = project.repository_url - - # 克隆仓库 - result = subprocess.run( - ["git", "clone", "--depth", "1", "--branch", branch, repo_url, base_path], + git_check = subprocess.run( + ["git", "--version"], capture_output=True, text=True, - timeout=300, + timeout=10 ) - - if result.returncode == 0: - logger.info(f"✅ Cloned repository {repo_url} (branch: {branch}) to {base_path}") - else: - logger.warning(f"Failed to clone branch {branch}, trying default branch: {result.stderr}") - # 如果克隆失败,尝试使用默认分支 - if branch != "main": - result = subprocess.run( - ["git", "clone", "--depth", "1", repo_url, base_path], - capture_output=True, - text=True, - timeout=300, - ) - if result.returncode == 0: - logger.info(f"✅ Cloned repository {repo_url} (default branch) to {base_path}") - else: - logger.error(f"Failed to clone repository: {result.stderr}") + if git_check.returncode != 0: + raise RuntimeError("Git 未安装,无法克隆仓库。请在 Docker 容器中安装 git。") + logger.debug(f"Git version: {git_check.stdout.strip()}") + except FileNotFoundError: + raise RuntimeError("Git 未安装,无法克隆仓库。请在 Docker 容器中安装 git。") except subprocess.TimeoutExpired: - logger.error(f"Git clone timeout for {project.repository_url}") - except Exception as e: - logger.error(f"Failed to clone repository {project.repository_url}: {e}") - + raise RuntimeError("Git 检测超时") + + # 构建带认证的 URL(用于私有仓库) + auth_url = repo_url + if repo_type == "github" and github_token: + parsed = urlparse(repo_url) + auth_url = urlunparse(( + parsed.scheme, + f"{github_token}@{parsed.netloc}", + parsed.path, + parsed.params, + parsed.query, + parsed.fragment + )) + logger.info(f"🔐 Using GitHub token for authentication") + elif repo_type == "gitlab" and gitlab_token: + parsed = urlparse(repo_url) + auth_url = urlunparse(( + parsed.scheme, + f"oauth2:{gitlab_token}@{parsed.netloc}", + parsed.path, + parsed.params, + parsed.query, + parsed.fragment + )) + logger.info(f"🔐 Using GitLab token for authentication") + + # 构建分支尝试顺序 + branches_to_try = [] + if branch_name: + branches_to_try.append(branch_name) + if project.default_branch and project.default_branch not in branches_to_try: + branches_to_try.append(project.default_branch) + # 添加常见默认分支 + for common_branch in ["main", "master"]: + if common_branch not in branches_to_try: + branches_to_try.append(common_branch) + + clone_success = False + last_error = "" + + for branch in branches_to_try: + # 清理目录(如果之前尝试失败) + if os.path.exists(base_path) and os.listdir(base_path): + shutil.rmtree(base_path) + os.makedirs(base_path, exist_ok=True) + + logger.info(f"🔄 Trying to clone repository (branch: {branch})...") + try: + result = subprocess.run( + ["git", "clone", "--depth", "1", "--branch", branch, auth_url, base_path], + capture_output=True, + text=True, + timeout=120, # 缩短超时时间 + ) + + if result.returncode == 0: + logger.info(f"✅ Cloned repository {repo_url} (branch: {branch}) to {base_path}") + clone_success = True + break + else: + last_error = result.stderr + logger.warning(f"Failed to clone branch {branch}: {last_error[:200]}") + except subprocess.TimeoutExpired: + last_error = f"克隆分支 {branch} 超时" + logger.warning(last_error) + + # 如果所有分支都失败,尝试不指定分支克隆(使用仓库默认分支) + if not clone_success: + logger.info(f"🔄 Trying to clone without specifying branch...") + if os.path.exists(base_path) and os.listdir(base_path): + shutil.rmtree(base_path) + os.makedirs(base_path, exist_ok=True) + + try: + result = subprocess.run( + ["git", "clone", "--depth", "1", auth_url, base_path], + capture_output=True, + text=True, + timeout=120, + ) + + if result.returncode == 0: + logger.info(f"✅ Cloned repository {repo_url} (default branch) to {base_path}") + clone_success = True + else: + last_error = result.stderr + except subprocess.TimeoutExpired: + last_error = "克隆仓库超时" + + if not clone_success: + # 分析错误原因 + error_msg = "克隆仓库失败" + if "Authentication failed" in last_error or "401" in last_error: + error_msg = "认证失败,请检查 GitHub/GitLab Token 配置" + elif "not found" in last_error.lower() or "404" in last_error: + error_msg = "仓库不存在或无访问权限" + elif "Could not resolve host" in last_error: + error_msg = "无法解析主机名,请检查网络连接" + elif "Permission denied" in last_error or "403" in last_error: + error_msg = "无访问权限,请检查仓库权限或 Token" + else: + error_msg = f"克隆仓库失败: {last_error[:200]}" + + logger.error(f"❌ {error_msg}") + raise RuntimeError(error_msg) + + # 验证目录不为空 + if not os.listdir(base_path): + raise RuntimeError(f"项目目录为空,可能是克隆/解压失败: {base_path}") + return base_path diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py index 0a4b573..aa0f1d2 100644 --- a/backend/app/services/scanner.py +++ b/backend/app/services/scanner.py @@ -293,19 +293,56 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N user_other_config = (user_config or {}).get('otherConfig', {}) github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN - + files: List[Dict[str, str]] = [] extracted_gitlab_token = None - - if repo_type == "github": - files = await get_github_files(repo_url, branch, github_token, task_exclude_patterns) - elif repo_type == "gitlab": - files = await get_gitlab_files(repo_url, branch, gitlab_token, task_exclude_patterns) - # GitLab文件可能带有token - if files and 'token' in files[0]: - extracted_gitlab_token = files[0].get('token') - else: - raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库") + + # 构建分支尝试顺序(分支降级机制) + branches_to_try = [branch] + if project.default_branch and project.default_branch != branch: + branches_to_try.append(project.default_branch) + for common_branch in ["main", "master"]: + if common_branch not in branches_to_try: + branches_to_try.append(common_branch) + + actual_branch = branch # 实际使用的分支 + last_error = None + + for try_branch in branches_to_try: + try: + print(f"🔄 尝试获取分支 {try_branch} 的文件列表...") + if repo_type == "github": + files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns) + elif repo_type == "gitlab": + files = await get_gitlab_files(repo_url, try_branch, gitlab_token, task_exclude_patterns) + # GitLab文件可能带有token + if files and 'token' in files[0]: + extracted_gitlab_token = files[0].get('token') + else: + raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库") + + if files: + actual_branch = try_branch + if try_branch != branch: + print(f"⚠️ 分支 {branch} 不存在或无法访问,已降级到分支 {try_branch}") + break + except Exception as e: + last_error = str(e) + print(f"⚠️ 获取分支 {try_branch} 失败: {last_error[:100]}") + continue + + if not files: + error_msg = f"无法获取仓库文件,所有分支尝试均失败" + if last_error: + if "404" in last_error or "Not Found" in last_error: + error_msg = f"仓库或分支不存在: {branch}" + elif "401" in last_error or "403" in last_error: + error_msg = "无访问权限,请检查 Token 配置" + else: + error_msg = f"获取文件失败: {last_error[:100]}" + raise Exception(error_msg) + + print(f"✅ 成功获取分支 {actual_branch} 的文件列表") # 限制文件数量 # 如果指定了特定文件,则只分析这些文件 diff --git a/docker-compose.yml b/docker-compose.yml index 0c9558c..d4e716e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,6 +32,13 @@ services: backend: build: context: ./backend + args: + - http_proxy= + - https_proxy= + - HTTP_PROXY= + - HTTPS_PROXY= + - all_proxy= + - ALL_PROXY= restart: unless-stopped volumes: - backend_uploads:/app/uploads @@ -63,6 +70,13 @@ services: frontend: build: context: ./frontend + args: + - http_proxy= + - https_proxy= + - HTTP_PROXY= + - HTTPS_PROXY= + - all_proxy= + - ALL_PROXY= restart: unless-stopped ports: - "3000:80" # Nginx 监听 80 端口 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 22ba070..6f75a8c 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -7,20 +7,25 @@ FROM node:20-alpine AS builder WORKDIR /app -# 清除代理设置 -ENV http_proxy= -ENV https_proxy= -ENV HTTP_PROXY= -ENV HTTPS_PROXY= +# 彻底清除代理设置 +ENV http_proxy="" +ENV https_proxy="" +ENV HTTP_PROXY="" +ENV HTTPS_PROXY="" +ENV all_proxy="" +ENV ALL_PROXY="" +ENV no_proxy="*" +ENV NO_PROXY="*" -RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \ - npm install -g pnpm +# 配置国内 npm 镜像源 +RUN npm config set registry https://registry.npmmirror.com && \ + npm install -g pnpm && \ + pnpm config set registry https://registry.npmmirror.com # 复制依赖文件 COPY package.json pnpm-lock.yaml ./ -RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \ - pnpm install --no-frozen-lockfile +RUN pnpm install --no-frozen-lockfile # 复制源代码 COPY . .