修复Agent审计任务识别不到文件的错误

This commit is contained in:
lintsinghua 2025-12-16 12:34:57 +08:00
parent 9f0d774aff
commit 364b8cea42
5 changed files with 336 additions and 100 deletions

View File

@ -1,49 +1,104 @@
FROM python:3.12-slim
# ============================================
# 多阶段构建 - 构建阶段
# ============================================
FROM python:3.12-slim AS builder
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# 清除代理设置,避免容器内网络问题
ENV http_proxy=
ENV https_proxy=
ENV HTTP_PROXY=
ENV HTTPS_PROXY=
# 配置 apt 重试机制以处理网络不稳定
RUN echo 'Acquire::Retries "3";' > /etc/apt/apt.conf.d/80-retries
# 彻底清除代理设置
ENV http_proxy=""
ENV https_proxy=""
ENV HTTP_PROXY=""
ENV HTTPS_PROXY=""
ENV all_proxy=""
ENV ALL_PROXY=""
ENV no_proxy="*"
ENV NO_PROXY="*"
# 安装系统依赖(包含 WeasyPrint 所需的库和中文字体支持)
RUN rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \
apt-get update && \
apt-get install -y --no-install-recommends --fix-missing \
# 清除 apt 代理配置并配置重试,使用阿里云镜像
RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \
sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list 2>/dev/null || true && \
rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \
echo 'Acquire::http::Proxy "false";' > /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::https::Proxy "false";' >> /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::Retries "5";' >> /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::http::Timeout "60";' >> /etc/apt/apt.conf.d/99-no-proxy
# 安装构建依赖gcc 只在构建阶段需要)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc \
libpq-dev \
curl \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
libglib2.0-0 \
shared-mime-info \
fonts-noto-cjk \
fonts-noto-cjk-extra \
fontconfig \
&& fc-cache -fv \
&& rm -rf /var/lib/apt/lists/*
# 安装 uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# 复制依赖文件和 README
# 配置 uv 使用官方 PyPI国内镜像不稳定时使用
ENV UV_INDEX_URL=https://pypi.org/simple/
# 复制依赖文件
COPY pyproject.toml uv.lock README.md ./
# 使用 uv 安装依赖(确保无代理)
RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \
uv sync --frozen --no-dev
# 安装 Python 依赖到虚拟环境
RUN uv sync --frozen --no-dev
# ============================================
# 多阶段构建 - 运行阶段
# ============================================
FROM python:3.12-slim AS runtime
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# 彻底清除代理设置
ENV http_proxy=""
ENV https_proxy=""
ENV HTTP_PROXY=""
ENV HTTPS_PROXY=""
ENV all_proxy=""
ENV ALL_PROXY=""
ENV no_proxy="*"
ENV NO_PROXY="*"
# 配置使用阿里云 Debian 镜像源
RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \
sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list 2>/dev/null || true
# 清除 apt 代理配置
RUN rm -f /etc/apt/apt.conf.d/proxy.conf 2>/dev/null || true && \
echo 'Acquire::http::Proxy "false";' > /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::https::Proxy "false";' >> /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::Retries "5";' >> /etc/apt/apt.conf.d/99-no-proxy && \
echo 'Acquire::http::Timeout "60";' >> /etc/apt/apt.conf.d/99-no-proxy
# 只安装运行时依赖(不需要 gcc
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libpq5 \
curl \
git \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libgdk-pixbuf-2.0-0 \
libglib2.0-0 \
shared-mime-info \
fonts-noto-cjk \
&& fc-cache -fv \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# 从构建阶段复制虚拟环境和 uv
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv
# 复制应用代码
COPY . .
@ -54,12 +109,5 @@ RUN mkdir -p /app/uploads/zip_files
# 暴露端口
EXPOSE 8000
# 启动命令(开发模式由 docker-compose 覆盖)
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
# 启动命令
CMD [".venv/bin/uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@ -261,12 +261,23 @@ async def _execute_agent_task(task_id: str):
if not project:
logger.error(f"Project not found for task {task_id}")
return
# 获取项目根目录
project_root = await _get_project_root(project, task_id)
# 获取用户配置
# 获取用户配置(需要在获取项目根目录之前,以便传递 token
user_config = await _get_user_config(db, task.created_by)
# 从用户配置中提取 token用于私有仓库克隆
other_config = (user_config or {}).get('otherConfig', {})
github_token = other_config.get('githubToken') or settings.GITHUB_TOKEN
gitlab_token = other_config.get('gitlabToken') or settings.GITLAB_TOKEN
# 获取项目根目录(传递任务指定的分支和认证 token
project_root = await _get_project_root(
project,
task_id,
task.branch_name,
github_token=github_token,
gitlab_token=gitlab_token,
)
# 更新状态为运行中
task.status = AgentTaskStatus.RUNNING
@ -1164,6 +1175,7 @@ async def create_agent_task(
current_phase=AgentTaskPhase.PLANNING,
target_vulnerabilities=request.target_vulnerabilities,
verification_level=request.verification_level or "sandbox",
branch_name=request.branch_name, # 保存用户选择的分支
exclude_patterns=request.exclude_patterns,
target_files=request.target_files,
max_iterations=request.max_iterations or 50,
@ -1835,29 +1847,52 @@ async def update_finding_status(
# ============ Helper Functions ============
async def _get_project_root(project: Project, task_id: str) -> str:
async def _get_project_root(
project: Project,
task_id: str,
branch_name: Optional[str] = None,
github_token: Optional[str] = None,
gitlab_token: Optional[str] = None,
) -> str:
"""
获取项目根目录
支持两种项目类型
- ZIP 项目解压 ZIP 文件到临时目录
- 仓库项目克隆仓库到临时目录
Args:
project: 项目对象
task_id: 任务ID
branch_name: 分支名称仓库项目使用优先于 project.default_branch
github_token: GitHub 访问令牌用于私有仓库
gitlab_token: GitLab 访问令牌用于私有仓库
Returns:
项目根目录路径
Raises:
RuntimeError: 当项目文件获取失败时
"""
import zipfile
import subprocess
import shutil
from urllib.parse import urlparse, urlunparse
base_path = f"/tmp/deepaudit/{task_id}"
# 确保目录存在
# 确保目录存在且为空
if os.path.exists(base_path):
shutil.rmtree(base_path)
os.makedirs(base_path, exist_ok=True)
# 根据项目类型处理
if project.source_type == "zip":
# 🔥 ZIP 项目:解压 ZIP 文件
from app.services.zip_storage import load_project_zip
zip_path = await load_project_zip(project.id)
if zip_path and os.path.exists(zip_path):
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
@ -1865,44 +1900,141 @@ async def _get_project_root(project: Project, task_id: str) -> str:
logger.info(f"✅ Extracted ZIP project {project.id} to {base_path}")
except Exception as e:
logger.error(f"Failed to extract ZIP {zip_path}: {e}")
raise RuntimeError(f"无法解压项目文件: {e}")
else:
logger.warning(f"⚠️ ZIP file not found for project {project.id}")
raise RuntimeError(f"项目 ZIP 文件不存在: {project.id}")
elif project.source_type == "repository" and project.repository_url:
# 🔥 仓库项目:克隆仓库
repo_url = project.repository_url
repo_type = project.repository_type or "other"
# 检查 git 是否可用(使用 git --version 更可靠)
try:
branch = project.default_branch or "main"
repo_url = project.repository_url
# 克隆仓库
result = subprocess.run(
["git", "clone", "--depth", "1", "--branch", branch, repo_url, base_path],
git_check = subprocess.run(
["git", "--version"],
capture_output=True,
text=True,
timeout=300,
timeout=10
)
if result.returncode == 0:
logger.info(f"✅ Cloned repository {repo_url} (branch: {branch}) to {base_path}")
else:
logger.warning(f"Failed to clone branch {branch}, trying default branch: {result.stderr}")
# 如果克隆失败,尝试使用默认分支
if branch != "main":
result = subprocess.run(
["git", "clone", "--depth", "1", repo_url, base_path],
capture_output=True,
text=True,
timeout=300,
)
if result.returncode == 0:
logger.info(f"✅ Cloned repository {repo_url} (default branch) to {base_path}")
else:
logger.error(f"Failed to clone repository: {result.stderr}")
if git_check.returncode != 0:
raise RuntimeError("Git 未安装,无法克隆仓库。请在 Docker 容器中安装 git。")
logger.debug(f"Git version: {git_check.stdout.strip()}")
except FileNotFoundError:
raise RuntimeError("Git 未安装,无法克隆仓库。请在 Docker 容器中安装 git。")
except subprocess.TimeoutExpired:
logger.error(f"Git clone timeout for {project.repository_url}")
except Exception as e:
logger.error(f"Failed to clone repository {project.repository_url}: {e}")
raise RuntimeError("Git 检测超时")
# 构建带认证的 URL用于私有仓库
auth_url = repo_url
if repo_type == "github" and github_token:
parsed = urlparse(repo_url)
auth_url = urlunparse((
parsed.scheme,
f"{github_token}@{parsed.netloc}",
parsed.path,
parsed.params,
parsed.query,
parsed.fragment
))
logger.info(f"🔐 Using GitHub token for authentication")
elif repo_type == "gitlab" and gitlab_token:
parsed = urlparse(repo_url)
auth_url = urlunparse((
parsed.scheme,
f"oauth2:{gitlab_token}@{parsed.netloc}",
parsed.path,
parsed.params,
parsed.query,
parsed.fragment
))
logger.info(f"🔐 Using GitLab token for authentication")
# 构建分支尝试顺序
branches_to_try = []
if branch_name:
branches_to_try.append(branch_name)
if project.default_branch and project.default_branch not in branches_to_try:
branches_to_try.append(project.default_branch)
# 添加常见默认分支
for common_branch in ["main", "master"]:
if common_branch not in branches_to_try:
branches_to_try.append(common_branch)
clone_success = False
last_error = ""
for branch in branches_to_try:
# 清理目录(如果之前尝试失败)
if os.path.exists(base_path) and os.listdir(base_path):
shutil.rmtree(base_path)
os.makedirs(base_path, exist_ok=True)
logger.info(f"🔄 Trying to clone repository (branch: {branch})...")
try:
result = subprocess.run(
["git", "clone", "--depth", "1", "--branch", branch, auth_url, base_path],
capture_output=True,
text=True,
timeout=120, # 缩短超时时间
)
if result.returncode == 0:
logger.info(f"✅ Cloned repository {repo_url} (branch: {branch}) to {base_path}")
clone_success = True
break
else:
last_error = result.stderr
logger.warning(f"Failed to clone branch {branch}: {last_error[:200]}")
except subprocess.TimeoutExpired:
last_error = f"克隆分支 {branch} 超时"
logger.warning(last_error)
# 如果所有分支都失败,尝试不指定分支克隆(使用仓库默认分支)
if not clone_success:
logger.info(f"🔄 Trying to clone without specifying branch...")
if os.path.exists(base_path) and os.listdir(base_path):
shutil.rmtree(base_path)
os.makedirs(base_path, exist_ok=True)
try:
result = subprocess.run(
["git", "clone", "--depth", "1", auth_url, base_path],
capture_output=True,
text=True,
timeout=120,
)
if result.returncode == 0:
logger.info(f"✅ Cloned repository {repo_url} (default branch) to {base_path}")
clone_success = True
else:
last_error = result.stderr
except subprocess.TimeoutExpired:
last_error = "克隆仓库超时"
if not clone_success:
# 分析错误原因
error_msg = "克隆仓库失败"
if "Authentication failed" in last_error or "401" in last_error:
error_msg = "认证失败,请检查 GitHub/GitLab Token 配置"
elif "not found" in last_error.lower() or "404" in last_error:
error_msg = "仓库不存在或无访问权限"
elif "Could not resolve host" in last_error:
error_msg = "无法解析主机名,请检查网络连接"
elif "Permission denied" in last_error or "403" in last_error:
error_msg = "无访问权限,请检查仓库权限或 Token"
else:
error_msg = f"克隆仓库失败: {last_error[:200]}"
logger.error(f"{error_msg}")
raise RuntimeError(error_msg)
# 验证目录不为空
if not os.listdir(base_path):
raise RuntimeError(f"项目目录为空,可能是克隆/解压失败: {base_path}")
return base_path

View File

@ -293,19 +293,56 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
user_other_config = (user_config or {}).get('otherConfig', {})
github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN
gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN
files: List[Dict[str, str]] = []
extracted_gitlab_token = None
if repo_type == "github":
files = await get_github_files(repo_url, branch, github_token, task_exclude_patterns)
elif repo_type == "gitlab":
files = await get_gitlab_files(repo_url, branch, gitlab_token, task_exclude_patterns)
# GitLab文件可能带有token
if files and 'token' in files[0]:
extracted_gitlab_token = files[0].get('token')
else:
raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库")
# 构建分支尝试顺序(分支降级机制)
branches_to_try = [branch]
if project.default_branch and project.default_branch != branch:
branches_to_try.append(project.default_branch)
for common_branch in ["main", "master"]:
if common_branch not in branches_to_try:
branches_to_try.append(common_branch)
actual_branch = branch # 实际使用的分支
last_error = None
for try_branch in branches_to_try:
try:
print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
if repo_type == "github":
files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
elif repo_type == "gitlab":
files = await get_gitlab_files(repo_url, try_branch, gitlab_token, task_exclude_patterns)
# GitLab文件可能带有token
if files and 'token' in files[0]:
extracted_gitlab_token = files[0].get('token')
else:
raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库")
if files:
actual_branch = try_branch
if try_branch != branch:
print(f"⚠️ 分支 {branch} 不存在或无法访问,已降级到分支 {try_branch}")
break
except Exception as e:
last_error = str(e)
print(f"⚠️ 获取分支 {try_branch} 失败: {last_error[:100]}")
continue
if not files:
error_msg = f"无法获取仓库文件,所有分支尝试均失败"
if last_error:
if "404" in last_error or "Not Found" in last_error:
error_msg = f"仓库或分支不存在: {branch}"
elif "401" in last_error or "403" in last_error:
error_msg = "无访问权限,请检查 Token 配置"
else:
error_msg = f"获取文件失败: {last_error[:100]}"
raise Exception(error_msg)
print(f"✅ 成功获取分支 {actual_branch} 的文件列表")
# 限制文件数量
# 如果指定了特定文件,则只分析这些文件

View File

@ -32,6 +32,13 @@ services:
backend:
build:
context: ./backend
args:
- http_proxy=
- https_proxy=
- HTTP_PROXY=
- HTTPS_PROXY=
- all_proxy=
- ALL_PROXY=
restart: unless-stopped
volumes:
- backend_uploads:/app/uploads
@ -63,6 +70,13 @@ services:
frontend:
build:
context: ./frontend
args:
- http_proxy=
- https_proxy=
- HTTP_PROXY=
- HTTPS_PROXY=
- all_proxy=
- ALL_PROXY=
restart: unless-stopped
ports:
- "3000:80" # Nginx 监听 80 端口

View File

@ -7,20 +7,25 @@ FROM node:20-alpine AS builder
WORKDIR /app
# 清除代理设置
ENV http_proxy=
ENV https_proxy=
ENV HTTP_PROXY=
ENV HTTPS_PROXY=
# 彻底清除代理设置
ENV http_proxy=""
ENV https_proxy=""
ENV HTTP_PROXY=""
ENV HTTPS_PROXY=""
ENV all_proxy=""
ENV ALL_PROXY=""
ENV no_proxy="*"
ENV NO_PROXY="*"
RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \
npm install -g pnpm
# 配置国内 npm 镜像源
RUN npm config set registry https://registry.npmmirror.com && \
npm install -g pnpm && \
pnpm config set registry https://registry.npmmirror.com
# 复制依赖文件
COPY package.json pnpm-lock.yaml ./
RUN unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY && \
pnpm install --no-frozen-lockfile
RUN pnpm install --no-frozen-lockfile
# 复制源代码
COPY . .