Merge pull request #1 from vinland100/feat/gitea-support-15583597278683319916
feat: enhance Gitea support and security fixes
This commit is contained in:
commit
d4ad9b9328
|
|
@ -0,0 +1,129 @@
|
|||
name: Docker Publish
|
||||
|
||||
# 只构建并推送 Docker 镜像,不创建 Release 或 Tag
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: '镜像标签 (例如: latest, dev, v3.0.0)'
|
||||
required: true
|
||||
default: 'latest'
|
||||
type: string
|
||||
build_frontend:
|
||||
description: '构建前端镜像'
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
build_backend:
|
||||
description: '构建后端镜像'
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
build_sandbox:
|
||||
description: '构建沙箱镜像'
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: 构建并推送镜像
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: 检出代码
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: 设置 Node.js
|
||||
if: ${{ github.event.inputs.build_frontend == 'true' }}
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
|
||||
- name: 安装 pnpm
|
||||
if: ${{ github.event.inputs.build_frontend == 'true' }}
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
|
||||
- name: 安装前端依赖
|
||||
if: ${{ github.event.inputs.build_frontend == 'true' }}
|
||||
working-directory: ./frontend
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: 构建前端项目
|
||||
if: ${{ github.event.inputs.build_frontend == 'true' }}
|
||||
working-directory: ./frontend
|
||||
run: pnpm build
|
||||
env:
|
||||
VITE_USE_LOCAL_DB: 'true'
|
||||
|
||||
- name: 登录到 GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: 设置 QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: 设置 Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: 构建并推送前端 Docker 镜像
|
||||
if: ${{ github.event.inputs.build_frontend == 'true' }}
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./frontend
|
||||
file: ./frontend/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/deepaudit-frontend:${{ github.event.inputs.tag }}
|
||||
cache-from: type=gha,scope=frontend
|
||||
cache-to: type=gha,mode=max,scope=frontend
|
||||
|
||||
- name: 构建并推送后端 Docker 镜像
|
||||
if: ${{ github.event.inputs.build_backend == 'true' }}
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/deepaudit-backend:${{ github.event.inputs.tag }}
|
||||
cache-from: type=gha,scope=backend
|
||||
cache-to: type=gha,mode=max,scope=backend
|
||||
|
||||
- name: 构建并推送沙箱 Docker 镜像
|
||||
if: ${{ github.event.inputs.build_sandbox == 'true' }}
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/sandbox
|
||||
file: ./docker/sandbox/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/deepaudit-sandbox:${{ github.event.inputs.tag }}
|
||||
cache-from: type=gha,scope=sandbox
|
||||
cache-to: type=gha,mode=max,scope=sandbox
|
||||
|
||||
- name: 输出镜像信息
|
||||
run: |
|
||||
echo "## 镜像已推送到 GHCR" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
if [ "${{ github.event.inputs.build_frontend }}" == "true" ]; then
|
||||
echo "- \`ghcr.io/${{ github.repository_owner }}/deepaudit-frontend:${{ github.event.inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
if [ "${{ github.event.inputs.build_backend }}" == "true" ]; then
|
||||
echo "- \`ghcr.io/${{ github.repository_owner }}/deepaudit-backend:${{ github.event.inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
if [ "${{ github.event.inputs.build_sandbox }}" == "true" ]; then
|
||||
echo "- \`ghcr.io/${{ github.repository_owner }}/deepaudit-sandbox:${{ github.event.inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
|
|
@ -72,8 +72,7 @@ jobs:
|
|||
- name: 构建前端项目
|
||||
working-directory: ./frontend
|
||||
run: pnpm build
|
||||
env:
|
||||
VITE_USE_LOCAL_DB: 'true'
|
||||
|
||||
|
||||
# 8. 设置 Python 环境(用于后端)
|
||||
- name: 设置 Python
|
||||
|
|
@ -164,6 +163,7 @@ jobs:
|
|||
echo "- 🧠 **RAG 知识库增强**: 代码语义理解 + CWE/CVE 漏洞知识库" >> CHANGELOG.md
|
||||
echo "- 🔒 **沙箱漏洞验证**: Docker 安全容器自动执行 PoC" >> CHANGELOG.md
|
||||
echo "- 🛠️ **专业安全工具集成**: Semgrep, Bandit, Gitleaks, OSV-Scanner" >> CHANGELOG.md
|
||||
echo "- 🐛 **稳定性增强**: 修复多智能体工具调用循环、UI 显示及 Docker 环境兼容性问题" >> CHANGELOG.md
|
||||
echo "" >> CHANGELOG.md
|
||||
echo "## 📦 下载说明" >> CHANGELOG.md
|
||||
echo "" >> CHANGELOG.md
|
||||
|
|
|
|||
99
CHANGELOG.md
99
CHANGELOG.md
|
|
@ -2,98 +2,17 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [3.0.0] - 2024-12-15
|
||||
|
||||
### Highlights
|
||||
|
||||
**DeepAudit v3.0.0** introduces a revolutionary **Multi-Agent Intelligent Audit System**:
|
||||
|
||||
- Multi-Agent Architecture with Orchestrator-driven decision making
|
||||
- RAG (Retrieval-Augmented Generation) knowledge base enhancement
|
||||
- Docker sandbox for automated vulnerability verification
|
||||
- Professional security tool integration
|
||||
|
||||
### Added
|
||||
|
||||
#### Multi-Agent Architecture
|
||||
- **Orchestrator Agent**: Centralized orchestration for autonomous audit strategy decisions
|
||||
- **Recon Agent**: Information gathering, technology stack identification, and entry point discovery
|
||||
- **Analysis Agent**: Deep vulnerability analysis with Semgrep, RAG semantic search, and LLM analysis
|
||||
- **Verification Agent**: Sandbox testing, PoC generation, false positive filtering
|
||||
|
||||
#### RAG Knowledge Base
|
||||
- Code semantic understanding with Tree-sitter AST-based chunking
|
||||
- CWE/CVE vulnerability knowledge base integration
|
||||
- ChromaDB vector database support
|
||||
- Multi-language support: Python, JavaScript, TypeScript, Java, Go, PHP, Rust
|
||||
|
||||
#### Security Sandbox
|
||||
- Docker isolated container for PoC execution
|
||||
- Resource limits: memory, CPU constraints
|
||||
- Network isolation with configurable access
|
||||
- seccomp security policies
|
||||
|
||||
#### Security Tools Integration
|
||||
- **Semgrep**: Multi-language static analysis
|
||||
- **Bandit**: Python security scanning
|
||||
- **Gitleaks**: Secret leak detection
|
||||
- **TruffleHog**: Deep secret scanning
|
||||
- **npm audit**: Node.js dependency vulnerabilities
|
||||
- **Safety**: Python dependency audit
|
||||
- **OSV-Scanner**: Multi-language dependency vulnerabilities
|
||||
|
||||
#### New Features
|
||||
- Kunlun-M (MIT License) security scanner integration
|
||||
- File upload size limit increased to 500MB with large file optimization
|
||||
- Improved task tabs with card-style layout
|
||||
- Enhanced error handling and project scope filtering
|
||||
- Streaming LLM token usage reporting with input estimation
|
||||
|
||||
### Changed
|
||||
- Refactored Agent architecture with dynamic Agent tree
|
||||
- Expanded high-risk file patterns and dangerous pattern library
|
||||
- Enhanced sandbox functionality with forced sandbox verification
|
||||
- Improved report generation with normalized severity comparisons
|
||||
- Better agent stream stability preventing unnecessary reconnections
|
||||
## [3.0.1] - 2025-12-16
|
||||
|
||||
### Fixed
|
||||
- Agent stream stability issues with correct event buffer draining
|
||||
- Sandbox tool initialization logging improvements
|
||||
- Task phase update to REPORTING on completion
|
||||
- Various UI/UX improvements in AgentAudit component
|
||||
- **Agent Task Cancellation**: Fixed an issue where Agent tasks would continue running in the background after cancellation.
|
||||
- **Event Streaming**: Resolved `UnboundLocalError` in `event_manager.py` and removed artificial delays to prevent event queue buildup.
|
||||
- **Agent Timeout**: Increased Verification Agent timeout to 10 minutes to support complex PoC generation.
|
||||
- **LLM Streaming**: Improved robustness of `stream_llm_call` with explicit string timeouts to prevent hanging.
|
||||
|
||||
---
|
||||
|
||||
## [2.0.0] - 2024-11-15
|
||||
## [3.0.0] - 2025-12-15
|
||||
|
||||
### Added
|
||||
- Multi-LLM platform support (OpenAI, Claude, Gemini, Qwen, DeepSeek, Zhipu, etc.)
|
||||
- Ollama local model support for privacy-focused deployments
|
||||
- Project management with GitHub/GitLab import
|
||||
- ZIP file upload support
|
||||
- Instant code analysis feature
|
||||
- What-Why-How three-step fix recommendations
|
||||
- PDF/JSON report export
|
||||
- Audit rules management (OWASP Top 10 built-in)
|
||||
- Prompt template management with visual editor
|
||||
- Runtime LLM configuration in browser
|
||||
- i18n support (Chinese/English)
|
||||
|
||||
### Changed
|
||||
- Migrated to FastAPI backend
|
||||
- React 18 frontend with TypeScript
|
||||
- PostgreSQL database with Alembic migrations
|
||||
- Docker Compose deployment support
|
||||
|
||||
---
|
||||
|
||||
## [1.0.0] - 2024-10-01
|
||||
|
||||
### Added
|
||||
- Initial release
|
||||
- Basic code security audit functionality
|
||||
- LLM-powered vulnerability detection
|
||||
- Simple web interface
|
||||
- **Multi-Agent System**: Introduced Orchestrator, Recon, Analysis, and Verification agents for autonomous security auditing.
|
||||
- **RAG Integration**: Added Retrieval-Augmented Generation for better code understanding.
|
||||
- **Docker Sandbox**: Implemented secure environment for tool execution.
|
||||
|
|
|
|||
132
README.md
132
README.md
|
|
@ -1,6 +1,6 @@
|
|||
# DeepAudit - 开源的代码审计智能体平台 🦸♂️
|
||||
# DeepAudit - 人人拥有的 AI 审计战队,让漏洞挖掘触手可及 🦸♂️
|
||||
|
||||
> 让代码漏洞挖掘像呼吸一样简单,小白也能当黑客挖洞
|
||||
> 让代码漏洞挖掘像呼吸一样简单,小白也能轻松挖洞
|
||||
|
||||
<div style="width: 100%; max-width: 600px; margin: 0 auto;">
|
||||
<img src="frontend/public/images/logo.png" alt="DeepAudit Logo" style="width: 100%; height: auto; display: block; margin: 0 auto;">
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
<div align="center">
|
||||
|
||||
[](https://github.com/lintsinghua/DeepAudit/releases)
|
||||
[](https://github.com/lintsinghua/DeepAudit/releases)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://reactjs.org/)
|
||||
[](https://www.typescriptlang.org/)
|
||||
|
|
@ -158,36 +158,74 @@ DeepAudit/
|
|||
|
||||
---
|
||||
|
||||
## 🚀 快速开始 (Docker)
|
||||
## 🚀 快速开始
|
||||
|
||||
### 1. 启动项目
|
||||
### 方式一:一行命令部署(推荐)
|
||||
|
||||
复制一份 `backend/env.example` 为 `backend/.env`,并按需配置 LLM API Key。
|
||||
然后执行以下命令一键启动:
|
||||
使用预构建的 Docker 镜像,无需克隆代码,一行命令即可启动:
|
||||
|
||||
```bash
|
||||
# 1. 准备配置文件
|
||||
cp backend/env.example backend/.env
|
||||
|
||||
# 2. 构建沙箱镜像 (首次运行必须)
|
||||
cd docker/sandbox && chmod +x build.sh && ./build.sh && cd ../..
|
||||
|
||||
# 3. 启动服务
|
||||
docker compose up -d
|
||||
curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/v3.0.0/docker-compose.prod.yml | docker compose -f - up -d
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>🇨🇳 国内加速部署(点击展开)</summary>
|
||||
|
||||
使用南京大学镜像站加速拉取 Docker 镜像(将 `ghcr.io` 替换为 `ghcr.nju.edu.cn`):
|
||||
|
||||
```bash
|
||||
# 国内加速版 - 使用南京大学 GHCR 镜像站
|
||||
curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/main/docker-compose.prod.cn.yml | docker compose -f - up -d
|
||||
```
|
||||
|
||||
**手动拉取镜像(如需单独拉取):**
|
||||
```bash
|
||||
# 前端镜像
|
||||
docker pull ghcr.nju.edu.cn/lintsinghua/deepaudit-frontend:latest
|
||||
|
||||
# 后端镜像
|
||||
docker pull ghcr.nju.edu.cn/lintsinghua/deepaudit-backend:latest
|
||||
|
||||
# 沙箱镜像
|
||||
docker pull ghcr.nju.edu.cn/lintsinghua/deepaudit-sandbox:latest
|
||||
```
|
||||
|
||||
> 💡 镜像源由 [南京大学开源镜像站](https://mirrors.nju.edu.cn/) 提供支持
|
||||
|
||||
</details>
|
||||
|
||||
> 🎉 **启动成功!** 访问 http://localhost:3000 开始体验。
|
||||
|
||||
---
|
||||
|
||||
## 🔧 源码启动指南
|
||||
### 方式二:克隆代码部署
|
||||
|
||||
适合需要自定义配置或二次开发的用户:
|
||||
|
||||
```bash
|
||||
# 1. 克隆项目
|
||||
git clone https://github.com/lintsinghua/DeepAudit.git && cd DeepAudit
|
||||
|
||||
# 2. 配置环境变量
|
||||
cp backend/env.example backend/.env
|
||||
# 编辑 backend/.env 填入你的 LLM API Key
|
||||
|
||||
# 3. 一键启动
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> 首次启动会自动构建沙箱镜像,可能需要几分钟。
|
||||
|
||||
---
|
||||
|
||||
## 🔧 源码开发指南
|
||||
|
||||
适合开发者进行二次开发调试。
|
||||
|
||||
### 环境要求
|
||||
- Python 3.10+
|
||||
- Node.js 18+
|
||||
- PostgreSQL 14+
|
||||
- Python 3.11+
|
||||
- Node.js 20+
|
||||
- PostgreSQL 15+
|
||||
- Docker (用于沙箱)
|
||||
|
||||
|
||||
|
|
@ -206,12 +244,10 @@ cd backend
|
|||
# 配置环境
|
||||
cp env.example .env
|
||||
|
||||
# 激活虚拟环境 (推荐 uv/poetry)
|
||||
# 使用 uv 管理环境(推荐)
|
||||
uv sync
|
||||
source .venv/bin/activate
|
||||
|
||||
# 安装依赖
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 启动 API 服务
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
|
@ -223,16 +259,20 @@ cd frontend
|
|||
# 配置环境
|
||||
cp .env.example .env
|
||||
|
||||
npm install
|
||||
npm run dev
|
||||
pnpm install
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
### 4. 沙箱环境
|
||||
开发模式下,仍需通过 Docker 启动沙箱服务。
|
||||
### 3. 沙箱环境
|
||||
|
||||
开发模式下需要本地 Docker 拉取沙箱镜像:
|
||||
|
||||
```bash
|
||||
cd docker/sandbox
|
||||
./build.sh
|
||||
# 标准拉取
|
||||
docker pull ghcr.io/lintsinghua/deepaudit-sandbox:latest
|
||||
|
||||
# 国内加速(南京大学镜像站)
|
||||
docker pull ghcr.nju.edu.cn/lintsinghua/deepaudit-sandbox:latest
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -369,3 +409,37 @@ DeepSeek-Coder · Codestral<br/>
|
|||
<div align="center">
|
||||
<strong>Made with ❤️ by <a href="https://github.com/lintsinghua">lintsinghua</a></strong>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ 重要安全声明
|
||||
|
||||
### 法律合规声明
|
||||
1. 禁止**任何未经授权的漏洞测试、渗透测试或安全评估**
|
||||
2. 本项目仅供网络空间安全学术研究、教学和学习使用
|
||||
3. 严禁将本项目用于任何非法目的或未经授权的安全测试
|
||||
|
||||
### 漏洞上报责任
|
||||
1. 发现任何安全漏洞时,请及时通过合法渠道上报
|
||||
2. 严禁利用发现的漏洞进行非法活动
|
||||
3. 遵守国家网络安全法律法规,维护网络空间安全
|
||||
|
||||
### 使用限制
|
||||
- 仅限在授权环境下用于教育和研究目的
|
||||
- 禁止用于对未授权系统进行安全测试
|
||||
- 使用者需对自身行为承担全部法律责任
|
||||
|
||||
### 免责声明
|
||||
作者不对任何因使用本项目而导致的直接或间接损失负责,使用者需对自身行为承担全部法律责任。
|
||||
|
||||
---
|
||||
|
||||
## 📖 详细安全政策
|
||||
|
||||
有关安装政策、免责声明、代码隐私、API使用安全和漏洞报告的详细信息,请参阅 [DISCLAIMER.md](DISCLAIMER.md) 和 [SECURITY.md](SECURITY.md) 文件。
|
||||
|
||||
### 快速参考
|
||||
- 🔒 **代码隐私警告**: 您的代码将被发送到所选择的LLM服务商服务器
|
||||
- 🛡️ **敏感代码处理**: 使用本地模型处理敏感代码
|
||||
- ⚠️ **合规要求**: 遵守数据保护和隐私法律法规
|
||||
- 📧 **漏洞报告**: 发现安全问题请通过合法渠道上报
|
||||
|
|
|
|||
|
|
@ -103,11 +103,12 @@ COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv
|
|||
# 复制应用代码
|
||||
COPY . .
|
||||
|
||||
# 创建上传目录
|
||||
RUN mkdir -p /app/uploads/zip_files
|
||||
# 创建上传目录并设置启动脚本权限
|
||||
RUN mkdir -p /app/uploads/zip_files && \
|
||||
chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 8000
|
||||
|
||||
# 启动命令
|
||||
CMD [".venv/bin/uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
# 启动命令 - 使用启动脚本自动执行数据库迁移
|
||||
CMD ["/app/docker-entrypoint.sh"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
"""Add files_with_findings column to agent_tasks
|
||||
|
||||
Revision ID: 008_add_files_with_findings
|
||||
Revises: 4c280754c680
|
||||
Create Date: 2025-12-16
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '008_add_files_with_findings'
|
||||
down_revision = '4c280754c680'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add files_with_findings column to agent_tasks table (idempotent)
|
||||
conn = op.get_bind()
|
||||
inspector = sa.inspect(conn)
|
||||
columns = [col['name'] for col in inspector.get_columns('agent_tasks')]
|
||||
|
||||
if 'files_with_findings' not in columns:
|
||||
op.add_column(
|
||||
'agent_tasks',
|
||||
sa.Column('files_with_findings', sa.Integer(), nullable=True, default=0)
|
||||
)
|
||||
# Set default value for existing rows
|
||||
op.execute("UPDATE agent_tasks SET files_with_findings = 0 WHERE files_with_findings IS NULL")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column('agent_tasks', 'files_with_findings')
|
||||
|
|
@ -364,6 +364,17 @@ async def _execute_agent_task(task_id: str):
|
|||
},
|
||||
)
|
||||
|
||||
# 🔥 设置外部取消检查回调
|
||||
# 这确保即使 runner.cancel() 失败,Agent 也能通过 checking 全局标志感知取消
|
||||
def check_global_cancel():
|
||||
return is_task_cancelled(task_id)
|
||||
|
||||
orchestrator.set_cancel_callback(check_global_cancel)
|
||||
# 同时也为子 Agent 设置(虽然 Orchestrator 会传播)
|
||||
recon_agent.set_cancel_callback(check_global_cancel)
|
||||
analysis_agent.set_cancel_callback(check_global_cancel)
|
||||
verification_agent.set_cancel_callback(check_global_cancel)
|
||||
|
||||
# 注册到全局
|
||||
_running_orchestrators[task_id] = orchestrator
|
||||
_running_tasks[task_id] = orchestrator # 兼容旧的取消逻辑
|
||||
|
|
@ -437,7 +448,13 @@ async def _execute_agent_task(task_id: str):
|
|||
await _save_findings(db, task_id, findings)
|
||||
|
||||
# 更新任务统计
|
||||
task.status = AgentTaskStatus.COMPLETED
|
||||
# 🔥 CRITICAL FIX: 在设置完成前再次检查取消状态
|
||||
# 避免 "取消后后端继续运行并最终标记为完成" 的问题
|
||||
if is_task_cancelled(task_id):
|
||||
logger.info(f"[AgentTask] Task {task_id} was cancelled, overriding success result")
|
||||
task.status = AgentTaskStatus.CANCELLED
|
||||
else:
|
||||
task.status = AgentTaskStatus.COMPLETED
|
||||
task.completed_at = datetime.now(timezone.utc)
|
||||
task.current_phase = AgentTaskPhase.REPORTING
|
||||
task.findings_count = len(findings)
|
||||
|
|
@ -445,14 +462,18 @@ async def _execute_agent_task(task_id: str):
|
|||
task.tool_calls_count = result.tool_calls
|
||||
task.tokens_used = result.tokens_used
|
||||
|
||||
# 🔥 统计分析的文件数量(从 findings 中提取唯一文件)
|
||||
analyzed_file_set = set()
|
||||
# 🔥 统计文件数量
|
||||
# analyzed_files = 实际扫描过的文件数(任务完成时等于 total_files)
|
||||
# files_with_findings = 有漏洞发现的唯一文件数
|
||||
task.analyzed_files = task.total_files # Agent 扫描了所有符合条件的文件
|
||||
|
||||
files_with_findings_set = set()
|
||||
for f in findings:
|
||||
if isinstance(f, dict):
|
||||
file_path = f.get("file_path") or f.get("file") or f.get("location", "").split(":")[0]
|
||||
if file_path:
|
||||
analyzed_file_set.add(file_path)
|
||||
task.analyzed_files = len(analyzed_file_set) if analyzed_file_set else task.total_files
|
||||
files_with_findings_set.add(file_path)
|
||||
task.files_with_findings = len(files_with_findings_set)
|
||||
|
||||
# 统计严重程度和验证状态
|
||||
verified_count = 0
|
||||
|
|
@ -1584,7 +1605,17 @@ async def cancel_agent_task(
|
|||
runner.cancel()
|
||||
logger.info(f"[Cancel] Set cancel flag for task {task_id}")
|
||||
|
||||
# 🔥 2. 强制取消 asyncio Task(立即中断 LLM 调用)
|
||||
# 🔥 2. 通过 agent_registry 取消所有子 Agent
|
||||
from app.services.agent.core import agent_registry
|
||||
from app.services.agent.core.graph_controller import stop_all_agents
|
||||
try:
|
||||
# 停止所有 Agent(包括子 Agent)
|
||||
stop_result = stop_all_agents(exclude_root=False)
|
||||
logger.info(f"[Cancel] Stopped all agents: {stop_result}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Cancel] Failed to stop agents via registry: {e}")
|
||||
|
||||
# 🔥 3. 强制取消 asyncio Task(立即中断 LLM 调用)
|
||||
asyncio_task = _running_asyncio_tasks.get(task_id)
|
||||
if asyncio_task and not asyncio_task.done():
|
||||
asyncio_task.cancel()
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException
|
|||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from app.api import deps
|
||||
from app.models.user import User
|
||||
|
|
@ -46,10 +47,10 @@ class EmbeddingConfigResponse(BaseModel):
|
|||
"""配置响应"""
|
||||
provider: str
|
||||
model: str
|
||||
api_key: Optional[str] = None # 返回 API Key
|
||||
base_url: Optional[str]
|
||||
dimensions: int
|
||||
batch_size: int
|
||||
# 不返回 API Key
|
||||
|
||||
|
||||
class TestEmbeddingRequest(BaseModel):
|
||||
|
|
@ -172,7 +173,7 @@ async def get_embedding_config_from_db(db: AsyncSession, user_id: str) -> Embedd
|
|||
embedding_data = other_config.get(EMBEDDING_CONFIG_KEY)
|
||||
|
||||
if embedding_data:
|
||||
return EmbeddingConfig(
|
||||
config = EmbeddingConfig(
|
||||
provider=embedding_data.get("provider", settings.EMBEDDING_PROVIDER),
|
||||
model=embedding_data.get("model", settings.EMBEDDING_MODEL),
|
||||
api_key=embedding_data.get("api_key"),
|
||||
|
|
@ -180,10 +181,13 @@ async def get_embedding_config_from_db(db: AsyncSession, user_id: str) -> Embedd
|
|||
dimensions=embedding_data.get("dimensions"),
|
||||
batch_size=embedding_data.get("batch_size", 100),
|
||||
)
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
pass
|
||||
print(f"[EmbeddingConfig] 读取用户 {user_id} 的嵌入配置: provider={config.provider}, model={config.model}")
|
||||
return config
|
||||
except (json.JSONDecodeError, AttributeError) as e:
|
||||
print(f"[EmbeddingConfig] 解析用户 {user_id} 配置失败: {e}")
|
||||
|
||||
# 返回默认配置
|
||||
print(f"[EmbeddingConfig] 用户 {user_id} 无保存配置,返回默认值")
|
||||
return EmbeddingConfig(
|
||||
provider=settings.EMBEDDING_PROVIDER,
|
||||
model=settings.EMBEDDING_MODEL,
|
||||
|
|
@ -219,6 +223,8 @@ async def save_embedding_config_to_db(db: AsyncSession, user_id: str, config: Em
|
|||
|
||||
other_config[EMBEDDING_CONFIG_KEY] = embedding_data
|
||||
user_config.other_config = json.dumps(other_config)
|
||||
# 🔥 显式标记 other_config 字段已修改,确保 SQLAlchemy 检测到变化
|
||||
flag_modified(user_config, "other_config")
|
||||
else:
|
||||
# 创建新配置
|
||||
user_config = UserConfig(
|
||||
|
|
@ -230,6 +236,7 @@ async def save_embedding_config_to_db(db: AsyncSession, user_id: str, config: Em
|
|||
db.add(user_config)
|
||||
|
||||
await db.commit()
|
||||
print(f"[EmbeddingConfig] 已保存用户 {user_id} 的嵌入配置: provider={config.provider}, model={config.model}")
|
||||
|
||||
|
||||
# ============ API Endpoints ============
|
||||
|
|
@ -260,6 +267,7 @@ async def get_current_config(
|
|||
return EmbeddingConfigResponse(
|
||||
provider=config.provider,
|
||||
model=config.model,
|
||||
api_key=config.api_key,
|
||||
base_url=config.base_url,
|
||||
dimensions=dimensions,
|
||||
batch_size=config.batch_size,
|
||||
|
|
@ -280,10 +288,9 @@ async def update_config(
|
|||
if config.provider not in provider_ids:
|
||||
raise HTTPException(status_code=400, detail=f"不支持的提供商: {config.provider}")
|
||||
|
||||
# 验证模型
|
||||
# 获取提供商信息(用于检查 API Key 要求)
|
||||
provider = next((p for p in EMBEDDING_PROVIDERS if p.id == config.provider), None)
|
||||
if provider and config.model not in provider.models:
|
||||
raise HTTPException(status_code=400, detail=f"不支持的模型: {config.model}")
|
||||
# 注意:不再强制验证模型名称,允许用户输入自定义模型
|
||||
|
||||
# 检查 API Key
|
||||
if provider and provider.requires_api_key and not config.api_key:
|
||||
|
|
|
|||
|
|
@ -659,7 +659,8 @@ async def get_project_branches(
|
|||
config = config.scalar_one_or_none()
|
||||
|
||||
github_token = settings.GITHUB_TOKEN
|
||||
projects_gitea_token = settings.GITEA_TOKEN
|
||||
gitea_token = settings.GITEA_TOKEN
|
||||
gitlab_token = settings.GITLAB_TOKEN
|
||||
|
||||
SENSITIVE_OTHER_FIELDS = ['githubToken', 'gitlabToken', 'giteaToken']
|
||||
|
||||
|
|
@ -674,13 +675,12 @@ async def get_project_branches(
|
|||
elif field == 'gitlabToken':
|
||||
gitlab_token = decrypted_val
|
||||
elif field == 'giteaToken':
|
||||
projects_gitea_token = decrypted_val
|
||||
gitea_token = decrypted_val
|
||||
|
||||
repo_type = project.repository_type or "other"
|
||||
|
||||
# 详细日志
|
||||
print(f"[Branch] 项目: {project.name}, 类型: {repo_type}, URL: {project.repository_url}")
|
||||
print(f"[Branch] GitHub Token: {'已配置' if github_token else '未配置'}, GitLab Token: {'已配置' if gitlab_token else '未配置'}, Gitea Token: {'已配置' if projects_gitea_token else '未配置'}")
|
||||
|
||||
try:
|
||||
if repo_type == "github":
|
||||
|
|
@ -692,9 +692,9 @@ async def get_project_branches(
|
|||
print("[Branch] 警告: GitLab Token 未配置,可能无法访问私有仓库")
|
||||
branches = await get_gitlab_branches(project.repository_url, gitlab_token)
|
||||
elif repo_type == "gitea":
|
||||
if not projects_gitea_token:
|
||||
if not gitea_token:
|
||||
print("[Branch] 警告: Gitea Token 未配置,可能无法访问私有仓库")
|
||||
branches = await get_gitea_branches(project.repository_url, projects_gitea_token)
|
||||
branches = await get_gitea_branches(project.repository_url, gitea_token)
|
||||
else:
|
||||
# 对于其他类型,返回默认分支
|
||||
print(f"[Branch] 仓库类型 '{repo_type}' 不支持获取分支,返回默认分支")
|
||||
|
|
|
|||
|
|
@ -89,7 +89,8 @@ class AgentTask(Base):
|
|||
# 进度统计
|
||||
total_files = Column(Integer, default=0)
|
||||
indexed_files = Column(Integer, default=0)
|
||||
analyzed_files = Column(Integer, default=0)
|
||||
analyzed_files = Column(Integer, default=0) # 实际扫描过的文件数
|
||||
files_with_findings = Column(Integer, default=0) # 有漏洞发现的文件数
|
||||
total_chunks = Column(Integer, default=0) # 代码块总数
|
||||
|
||||
# Agent 统计
|
||||
|
|
|
|||
|
|
@ -85,15 +85,15 @@ ANALYSIS_SYSTEM_PROMPT = """你是 DeepAudit 的漏洞分析 Agent,一个**自
|
|||
- **dataflow_analysis**: 数据流追踪
|
||||
参数: source_code (str), variable_name (str)
|
||||
|
||||
### 辅助工具
|
||||
- **read_file**: 读取文件内容验证发现
|
||||
### 辅助工具(RAG 优先!)
|
||||
- **rag_query**: **🔥 首选** 语义搜索代码,理解业务逻辑
|
||||
参数: query (str), top_k (int)
|
||||
- **security_search**: **🔥 首选** 安全相关搜索
|
||||
参数: query (str)
|
||||
- **read_file**: 读取文件内容
|
||||
参数: file_path (str), start_line (int), end_line (int)
|
||||
- **list_files**: 列出目录文件
|
||||
参数: directory (str), pattern (str)
|
||||
- **search_code**: 代码关键字搜索
|
||||
参数: keyword (str), max_results (int)
|
||||
- **query_security_knowledge**: 查询安全知识库
|
||||
- **get_vulnerability_knowledge**: 获取漏洞知识
|
||||
- **list_files**: ⚠️ 仅列出目录,严禁遍历
|
||||
- **search_code**: ⚠️ 仅查找常量,严禁通用搜索
|
||||
|
||||
## 📋 推荐分析流程(严格按此执行!)
|
||||
|
||||
|
|
@ -193,6 +193,26 @@ Final Answer: [JSON 格式的漏洞报告]
|
|||
3. **上下文分析** - 看到可疑代码要读取上下文,理解完整逻辑
|
||||
4. **自主判断** - 不要机械相信工具输出,要用你的专业知识判断
|
||||
|
||||
## ⚠️ 关键约束 - 必须遵守!
|
||||
1. **禁止直接输出 Final Answer** - 你必须先调用工具来分析代码
|
||||
2. **至少调用两个工具** - 使用 smart_scan/semgrep_scan 进行扫描,然后用 read_file 查看代码
|
||||
3. **没有工具调用的分析无效** - 不允许仅凭推测直接报告漏洞
|
||||
4. **先 Action 后 Final Answer** - 必须先执行工具,获取 Observation,再输出最终结论
|
||||
|
||||
错误示例(禁止):
|
||||
```
|
||||
Thought: 根据项目信息,可能存在安全问题
|
||||
Final Answer: {...} ❌ 没有调用任何工具!
|
||||
```
|
||||
|
||||
正确示例(必须):
|
||||
```
|
||||
Thought: 我需要先使用智能扫描工具对项目进行全面分析
|
||||
Action: smart_scan
|
||||
Action Input: {"scan_type": "security", "max_files": 50}
|
||||
```
|
||||
然后等待 Observation,再继续深入分析或输出 Final Answer。
|
||||
|
||||
现在开始你的安全分析!首先使用外部工具进行全面扫描。"""
|
||||
|
||||
|
||||
|
|
@ -402,7 +422,7 @@ class AnalysisAgent(BaseAgent):
|
|||
## 可用工具
|
||||
{self.get_tools_description()}
|
||||
|
||||
请开始你的安全分析。首先读取高风险区域的文件,然后分析其中的安全问题。"""
|
||||
请开始你的安全分析。首先读取高风险区域的文件,然后**立即**分析其中的安全问题(输出 Action)。"""
|
||||
|
||||
# 🔥 记录工作开始
|
||||
self.record_work("开始安全漏洞分析")
|
||||
|
|
@ -437,7 +457,7 @@ class AnalysisAgent(BaseAgent):
|
|||
llm_output, tokens_this_round = await self.stream_llm_call(
|
||||
self._conversation_history,
|
||||
temperature=0.1,
|
||||
max_tokens=4096,
|
||||
max_tokens=8192,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"[{self.name}] LLM call cancelled")
|
||||
|
|
@ -594,7 +614,7 @@ Final Answer: {{"findings": [...], "summary": "..."}}"""
|
|||
await self.emit_llm_decision("继续分析", "LLM 需要更多分析")
|
||||
self._conversation_history.append({
|
||||
"role": "user",
|
||||
"content": "请继续分析。选择一个工具执行,或者如果分析完成,输出 Final Answer 汇总所有发现。",
|
||||
"content": "请继续分析。你输出了 Thought 但没有输出 Action。请**立即**选择一个工具执行,或者如果分析完成,输出 Final Answer 汇总所有发现。",
|
||||
})
|
||||
|
||||
# 🔥 如果循环结束但没有发现,强制 LLM 总结
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class AgentConfig:
|
|||
# LLM 配置
|
||||
model: Optional[str] = None
|
||||
temperature: float = 0.1
|
||||
max_tokens: int = 4096
|
||||
max_tokens: int = 8192
|
||||
|
||||
# 执行限制
|
||||
max_iterations: int = 20
|
||||
|
|
@ -485,9 +485,24 @@ class BaseAgent(ABC):
|
|||
self._cancelled = True
|
||||
logger.info(f"[{self.name}] Cancel requested")
|
||||
|
||||
# 🔥 外部取消检查回调
|
||||
self._cancel_callback = None
|
||||
|
||||
def set_cancel_callback(self, callback) -> None:
|
||||
"""设置外部取消检查回调"""
|
||||
self._cancel_callback = callback
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
return self._cancelled
|
||||
"""检查是否已取消(包含内部标志和外部回调)"""
|
||||
if self._cancelled:
|
||||
return True
|
||||
# 检查外部回调
|
||||
if self._cancel_callback and self._cancel_callback():
|
||||
self._cancelled = True
|
||||
logger.info(f"[{self.name}] Detected cancellation from callback")
|
||||
return True
|
||||
return False
|
||||
|
||||
# ============ 协作方法 ============
|
||||
|
||||
|
|
@ -949,41 +964,83 @@ class BaseAgent(ABC):
|
|||
logger.info(f"[{self.name}] ✅ thinking_start emitted, starting LLM stream...")
|
||||
|
||||
try:
|
||||
async for chunk in self.llm_service.chat_completion_stream(
|
||||
# 获取流式迭代器
|
||||
stream = self.llm_service.chat_completion_stream(
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
):
|
||||
)
|
||||
# 兼容不同版本的 python async generator
|
||||
iterator = stream.__aiter__()
|
||||
|
||||
import time
|
||||
first_token_received = False
|
||||
last_activity = time.time()
|
||||
|
||||
while True:
|
||||
# 检查取消
|
||||
if self.is_cancelled:
|
||||
logger.info(f"[{self.name}] Cancelled during LLM streaming")
|
||||
logger.info(f"[{self.name}] Cancelled during LLM streaming loop")
|
||||
break
|
||||
|
||||
if chunk["type"] == "token":
|
||||
token = chunk["content"]
|
||||
accumulated = chunk["accumulated"]
|
||||
await self.emit_thinking_token(token, accumulated)
|
||||
# 🔥 CRITICAL: 让出控制权给事件循环,让 SSE 有机会发送事件
|
||||
# 如果不这样做,所有 token 会在循环结束后一起发送
|
||||
await asyncio.sleep(0)
|
||||
try:
|
||||
# 🔥 第一個 token 30秒超时,后续 token 60秒超时
|
||||
# 这是一个应用层的安全网,防止底层 LLM 客户端挂死
|
||||
timeout = 30.0 if not first_token_received else 60.0
|
||||
|
||||
elif chunk["type"] == "done":
|
||||
accumulated = chunk["content"]
|
||||
if chunk.get("usage"):
|
||||
total_tokens = chunk["usage"].get("total_tokens", 0)
|
||||
chunk = await asyncio.wait_for(iterator.__anext__(), timeout=timeout)
|
||||
|
||||
last_activity = time.time()
|
||||
|
||||
if chunk["type"] == "token":
|
||||
first_token_received = True
|
||||
token = chunk["content"]
|
||||
# 🔥 累积 content,确保 accumulated 变量更新
|
||||
# 注意:某些 adapter 返回的 chunk["accumulated"] 可能已经包含了累积值,
|
||||
# 但为了安全起见,如果不一致,我们自己累积
|
||||
if "accumulated" in chunk:
|
||||
accumulated = chunk["accumulated"]
|
||||
else:
|
||||
# 如果 adapter 没返回 accumulated,我们自己拼
|
||||
# 注意:如果是 token 类型,content 是增量
|
||||
# 如果 accumulated 被覆盖了,需要小心。
|
||||
# 实际上 service.py 中 chat_completion_stream 保证了 accumulated 存在
|
||||
# 这里我们信任 service 层的 accumulated
|
||||
pass
|
||||
|
||||
# Double check if accumulated is empty but we have token
|
||||
if not accumulated and token:
|
||||
accumulated += token # Fallback
|
||||
|
||||
await self.emit_thinking_token(token, accumulated)
|
||||
# 🔥 CRITICAL: 让出控制权给事件循环,让 SSE 有机会发送事件
|
||||
await asyncio.sleep(0)
|
||||
|
||||
elif chunk["type"] == "done":
|
||||
accumulated = chunk["content"]
|
||||
if chunk.get("usage"):
|
||||
total_tokens = chunk["usage"].get("total_tokens", 0)
|
||||
break
|
||||
|
||||
elif chunk["type"] == "error":
|
||||
accumulated = chunk.get("accumulated", "")
|
||||
error_msg = chunk.get("error", "Unknown error")
|
||||
logger.error(f"[{self.name}] Stream error: {error_msg}")
|
||||
if accumulated:
|
||||
total_tokens = chunk.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。"
|
||||
break
|
||||
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
|
||||
elif chunk["type"] == "error":
|
||||
accumulated = chunk.get("accumulated", "")
|
||||
error_msg = chunk.get("error", "Unknown error")
|
||||
logger.error(f"[{self.name}] Stream error: {error_msg}")
|
||||
# 🔥 如果有部分累积内容,尝试使用它
|
||||
if accumulated:
|
||||
logger.warning(f"[{self.name}] Using partial accumulated content ({len(accumulated)} chars)")
|
||||
total_tokens = chunk.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
# 🔥 返回一个提示 LLM 继续的消息,而不是空字符串
|
||||
accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。"
|
||||
except asyncio.TimeoutError:
|
||||
timeout_type = "First Token" if not first_token_received else "Stream"
|
||||
logger.error(f"[{self.name}] LLM {timeout_type} Timeout ({timeout}s)")
|
||||
error_msg = f"LLM 响应超时 ({timeout_type}, {timeout}s)"
|
||||
await self.emit_event("error", error_msg)
|
||||
if not accumulated:
|
||||
accumulated = f"[超时错误: {timeout}s 无响应] 请尝试简化请求或重试。"
|
||||
break
|
||||
|
||||
except asyncio.CancelledError:
|
||||
|
|
@ -993,7 +1050,6 @@ class BaseAgent(ABC):
|
|||
# 🔥 增强异常处理,避免吞掉错误
|
||||
logger.error(f"[{self.name}] Unexpected error in stream_llm_call: {e}", exc_info=True)
|
||||
await self.emit_event("error", f"LLM 调用错误: {str(e)}")
|
||||
# 返回错误提示,让 Agent 知道发生了什么
|
||||
accumulated = f"[LLM调用错误: {str(e)}] 请重试。"
|
||||
finally:
|
||||
await self.emit_thinking_end(accumulated)
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ class OrchestratorAgent(BaseAgent):
|
|||
llm_output, tokens_this_round = await self.stream_llm_call(
|
||||
self._conversation_history,
|
||||
temperature=0.1,
|
||||
max_tokens=4096, # 🔥 增加到 4096,避免截断
|
||||
max_tokens=8192, # 🔥 增加到 8192,避免截断
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"[{self.name}] LLM call cancelled")
|
||||
|
|
@ -657,7 +657,7 @@ Action Input: {{"参数": "值"}}
|
|||
agent_timeouts = {
|
||||
"recon": 300, # 5 分钟
|
||||
"analysis": 600, # 10 分钟
|
||||
"verification": 300, # 5 分钟
|
||||
"verification": 600, # 10 分钟
|
||||
}
|
||||
timeout = agent_timeouts.get(agent_name, 300)
|
||||
|
||||
|
|
@ -667,7 +667,8 @@ Action Input: {{"参数": "值"}}
|
|||
try:
|
||||
while not run_task.done():
|
||||
if self.is_cancelled:
|
||||
# 传播取消到子 Agent
|
||||
# 🔥 传播取消到子 Agent
|
||||
logger.info(f"[{self.name}] Cancelling sub-agent {agent_name} due to parent cancel")
|
||||
if hasattr(agent, 'cancel'):
|
||||
agent.cancel()
|
||||
run_task.cancel()
|
||||
|
|
@ -677,18 +678,28 @@ Action Input: {{"参数": "值"}}
|
|||
pass
|
||||
raise asyncio.CancelledError("任务已取消")
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
asyncio.shield(run_task),
|
||||
timeout=1.0 # 每秒检查一次取消状态
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
# Use asyncio.wait to poll without cancelling the task
|
||||
done, pending = await asyncio.wait(
|
||||
[run_task],
|
||||
timeout=0.5,
|
||||
return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
if run_task in done:
|
||||
return run_task.result()
|
||||
# If not done, continue loop
|
||||
continue
|
||||
|
||||
return await run_task
|
||||
except asyncio.CancelledError:
|
||||
# 🔥 确保子任务被取消
|
||||
if not run_task.done():
|
||||
if hasattr(agent, 'cancel'):
|
||||
agent.cancel()
|
||||
run_task.cancel()
|
||||
try:
|
||||
await run_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
raise
|
||||
|
||||
try:
|
||||
|
|
@ -877,17 +888,32 @@ Action Input: {{"参数": "值"}}
|
|||
|
||||
if same_file and (same_line or similar_desc or same_type):
|
||||
# Update existing with new info (e.g. verification results)
|
||||
# Prefer verified data over unverified
|
||||
merged = {**existing_f, **normalized_new}
|
||||
# 🔥 FIX: Smart merge - don't overwrite good data with empty values
|
||||
merged = dict(existing_f) # Start with existing data
|
||||
for key, value in normalized_new.items():
|
||||
# Only overwrite if new value is meaningful
|
||||
if value is not None and value != "" and value != 0:
|
||||
merged[key] = value
|
||||
elif key not in merged or merged[key] is None:
|
||||
# Fill in missing fields even with empty values
|
||||
merged[key] = value
|
||||
|
||||
# Keep the better title
|
||||
if normalized_new.get("title") and len(normalized_new.get("title", "")) > len(existing_f.get("title", "")):
|
||||
merged["title"] = normalized_new["title"]
|
||||
# Keep verified status if either is verified
|
||||
if existing_f.get("is_verified") or normalized_new.get("is_verified"):
|
||||
merged["is_verified"] = True
|
||||
# 🔥 FIX: Preserve non-zero line numbers
|
||||
if existing_f.get("line_start") and not normalized_new.get("line_start"):
|
||||
merged["line_start"] = existing_f["line_start"]
|
||||
# 🔥 FIX: Preserve vulnerability_type
|
||||
if existing_f.get("vulnerability_type") and not normalized_new.get("vulnerability_type"):
|
||||
merged["vulnerability_type"] = existing_f["vulnerability_type"]
|
||||
|
||||
self._all_findings[i] = merged
|
||||
found = True
|
||||
logger.info(f"[Orchestrator] Merged finding: {new_file}:{new_line} ({new_type})")
|
||||
logger.info(f"[Orchestrator] Merged finding: {new_file}:{merged.get('line_start', 0)} ({merged.get('vulnerability_type', '')})")
|
||||
break
|
||||
|
||||
if not found:
|
||||
|
|
|
|||
|
|
@ -19,11 +19,146 @@ from dataclasses import dataclass
|
|||
|
||||
from .base import BaseAgent, AgentConfig, AgentResult, AgentType, AgentPattern
|
||||
from ..json_parser import AgentJsonParser
|
||||
from ..prompts import RECON_SYSTEM_PROMPT, TOOL_USAGE_GUIDE
|
||||
from ..prompts import TOOL_USAGE_GUIDE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
RECON_SYSTEM_PROMPT = """你是 DeepAudit 的侦察 Agent,负责收集和分析项目信息。
|
||||
|
||||
## 你的职责
|
||||
作为侦察层,你负责:
|
||||
1. 分析项目结构和技术栈
|
||||
2. 识别关键入口点
|
||||
3. 发现配置文件和敏感区域
|
||||
4. **推荐需要使用的外部安全工具**
|
||||
5. 提供初步风险评估
|
||||
|
||||
## 侦察目标
|
||||
|
||||
### 1. 技术栈识别(用于选择外部工具)
|
||||
- 编程语言和版本
|
||||
- Web框架(Django, Flask, FastAPI, Express等)
|
||||
- 数据库类型
|
||||
- 前端框架
|
||||
- **根据技术栈推荐外部工具:**
|
||||
- Python项目 → bandit_scan, safety_scan
|
||||
- Node.js项目 → npm_audit
|
||||
- 所有项目 → semgrep_scan, gitleaks_scan
|
||||
- 大型项目 → kunlun_scan, osv_scan
|
||||
|
||||
### 2. 入口点发现
|
||||
- HTTP路由和API端点
|
||||
- Websocket处理
|
||||
- 定时任务和后台作业
|
||||
- 消息队列消费者
|
||||
|
||||
### 3. 敏感区域定位
|
||||
- 认证和授权代码
|
||||
- 数据库操作
|
||||
- 文件处理
|
||||
- 外部服务调用
|
||||
|
||||
### 4. 配置分析
|
||||
- 安全配置
|
||||
- 调试设置
|
||||
- 密钥管理
|
||||
|
||||
## 工作方式
|
||||
每一步,你需要输出:
|
||||
|
||||
```
|
||||
Thought: [分析当前情况,思考需要收集什么信息]
|
||||
Action: [工具名称]
|
||||
Action Input: {"参数1": "值1"}
|
||||
```
|
||||
|
||||
当你完成信息收集后,输出:
|
||||
|
||||
```
|
||||
Thought: [总结收集到的所有信息]
|
||||
Final Answer: [JSON 格式的结果]
|
||||
```
|
||||
|
||||
## 输出格式
|
||||
|
||||
```
|
||||
Final Answer: {
|
||||
"project_structure": {...},
|
||||
"tech_stack": {
|
||||
"languages": [...],
|
||||
"frameworks": [...],
|
||||
"databases": [...]
|
||||
},
|
||||
"recommended_tools": {
|
||||
"must_use": ["semgrep_scan", "gitleaks_scan", ...],
|
||||
"recommended": ["kunlun_scan", ...],
|
||||
"reason": "基于项目技术栈的推荐理由"
|
||||
},
|
||||
"entry_points": [
|
||||
{"type": "...", "file": "...", "line": ..., "method": "..."}
|
||||
],
|
||||
"high_risk_areas": [
|
||||
"文件路径:行号 - 风险描述"
|
||||
],
|
||||
"initial_findings": [
|
||||
{"title": "...", "file_path": "...", "line_start": ..., "description": "..."}
|
||||
],
|
||||
"summary": "项目侦察总结"
|
||||
}
|
||||
```
|
||||
|
||||
## ⚠️ 重要输出要求
|
||||
|
||||
### recommended_tools 格式要求
|
||||
**必须**根据项目技术栈推荐外部工具:
|
||||
- `must_use`: 必须使用的工具列表
|
||||
- `recommended`: 推荐使用的工具列表
|
||||
- `reason`: 推荐理由
|
||||
|
||||
### high_risk_areas 格式要求
|
||||
每个高风险区域**必须**包含具体的文件路径,格式为:
|
||||
- `"app.py:36 - SECRET_KEY 硬编码"`
|
||||
- `"utils/file.py:120 - 使用用户输入构造文件路径"`
|
||||
- `"api/views.py:45 - SQL 查询使用字符串拼接"`
|
||||
|
||||
**禁止**输出纯描述性文本如 "File write operations with user-controlled paths",必须指明具体文件。
|
||||
|
||||
### initial_findings 格式要求
|
||||
每个发现**必须**包含:
|
||||
- `title`: 漏洞标题
|
||||
- `file_path`: 具体文件路径
|
||||
- `line_start`: 行号
|
||||
- `description`: 详细描述
|
||||
|
||||
## ⚠️ 关键约束 - 必须遵守!
|
||||
1. **禁止直接输出 Final Answer** - 你必须先调用工具来收集项目信息
|
||||
2. **至少调用三个工具** - 使用 rag_query 语义搜索关键入口,read_file 读取文件,list_files 仅查看根目录
|
||||
3. **没有工具调用的侦察无效** - 不允许仅凭项目名称直接推测
|
||||
4. **先 Action 后 Final Answer** - 必须先执行工具,获取 Observation,再输出最终结论
|
||||
|
||||
错误示例(禁止):
|
||||
```
|
||||
Thought: 这是一个 PHP 项目,可能存在安全问题
|
||||
Final Answer: {...} ❌ 没有调用任何工具!
|
||||
```
|
||||
|
||||
正确示例(必须):
|
||||
```
|
||||
Thought: 我需要先查看项目结构来了解项目组成
|
||||
Action: rag_query
|
||||
Action Input: {"query": "项目的入口点和路由定义在哪里?", "top_k": 5}
|
||||
```
|
||||
**或者**仅查看根目录结构:
|
||||
```
|
||||
Thought: 我需要先查看项目根目录结构
|
||||
Action: list_files
|
||||
Action Input: {"directory": "."}
|
||||
```
|
||||
然后等待 Observation,再继续收集信息或输出 Final Answer。
|
||||
"""
|
||||
|
||||
|
||||
# ... (上文导入)
|
||||
# ...
|
||||
|
||||
|
|
@ -193,7 +328,7 @@ class ReconAgent(BaseAgent):
|
|||
## 可用工具
|
||||
{self.get_tools_description()}
|
||||
|
||||
请开始你的信息收集工作。首先思考应该收集什么信息,然后选择合适的工具。"""
|
||||
请开始你的信息收集工作。首先思考应该收集什么信息,然后**立即**选择合适的工具执行(输出 Action)。不要只输出 Thought,必须紧接着输出 Action。"""
|
||||
|
||||
# 初始化对话历史
|
||||
self._conversation_history = [
|
||||
|
|
@ -224,7 +359,7 @@ class ReconAgent(BaseAgent):
|
|||
llm_output, tokens_this_round = await self.stream_llm_call(
|
||||
self._conversation_history,
|
||||
temperature=0.1,
|
||||
max_tokens=4096, # 🔥 增加到 4096,避免截断
|
||||
max_tokens=8192, # 🔥 增加到 8192,避免截断
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"[{self.name}] LLM call cancelled")
|
||||
|
|
@ -360,7 +495,7 @@ Final Answer: [JSON格式的结果]"""
|
|||
await self.emit_llm_decision("继续思考", "LLM 需要更多信息")
|
||||
self._conversation_history.append({
|
||||
"role": "user",
|
||||
"content": "请继续,选择一个工具执行,或者如果信息收集完成,输出 Final Answer。",
|
||||
"content": "请继续。你输出了 Thought 但没有输出 Action。请**立即**选择一个工具执行(Action: ...),或者如果信息收集完成,输出 Final Answer。",
|
||||
})
|
||||
|
||||
# 🔥 如果循环结束但没有 final_result,强制 LLM 总结
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ VERIFICATION_SYSTEM_PROMPT = """你是 DeepAudit 的漏洞验证 Agent,一个*
|
|||
### 文件操作
|
||||
- **read_file**: 读取更多代码上下文
|
||||
参数: file_path (str), start_line (int), end_line (int)
|
||||
- **list_files**: 列出目录文件
|
||||
- **list_files**: ⚠️ 仅用于确认文件是否存在,严禁遍历
|
||||
参数: directory (str), pattern (str)
|
||||
|
||||
### 沙箱核心工具
|
||||
|
|
@ -212,6 +212,26 @@ Final Answer: [JSON 格式的验证报告]
|
|||
- 代码执行: 可直接运行的利用脚本
|
||||
- ⚠️ payload 字段必须是**可直接复制执行**的完整利用代码,不要只写参数值
|
||||
|
||||
## ⚠️ 关键约束 - 必须遵守!
|
||||
1. **禁止直接输出 Final Answer** - 你必须先调用至少一个工具来验证漏洞
|
||||
2. **每个漏洞至少调用一次工具** - 使用 read_file 读取代码,或使用 test_* 工具测试
|
||||
3. **没有工具调用的验证无效** - 不允许仅凭已知信息直接判断
|
||||
4. **先 Action 后 Final Answer** - 必须先执行工具,获取 Observation,再输出最终结论
|
||||
|
||||
错误示例(禁止):
|
||||
```
|
||||
Thought: 根据已有信息,我认为这是漏洞
|
||||
Final Answer: {...} ❌ 没有调用任何工具!
|
||||
```
|
||||
|
||||
正确示例(必须):
|
||||
```
|
||||
Thought: 我需要先读取 config.php 文件来验证硬编码凭据
|
||||
Action: read_file
|
||||
Action Input: {"file_path": "config.php"}
|
||||
```
|
||||
然后等待 Observation,再继续验证其他发现或输出 Final Answer。
|
||||
|
||||
现在开始验证漏洞发现!"""
|
||||
|
||||
|
||||
|
|
@ -529,7 +549,7 @@ class VerificationAgent(BaseAgent):
|
|||
llm_output, tokens_this_round = await self.stream_llm_call(
|
||||
self._conversation_history,
|
||||
temperature=0.1,
|
||||
max_tokens=4096, # 🔥 增加到 4096,避免截断
|
||||
max_tokens=8192, # 🔥 增加到 8192,避免截断
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"[{self.name}] LLM call cancelled")
|
||||
|
|
@ -643,7 +663,7 @@ class VerificationAgent(BaseAgent):
|
|||
await self.emit_llm_decision("继续验证", "LLM 需要更多验证")
|
||||
self._conversation_history.append({
|
||||
"role": "user",
|
||||
"content": "请继续验证。如果验证完成,输出 Final Answer 汇总所有验证结果。",
|
||||
"content": "请继续验证。你输出了 Thought 但没有输出 Action。请**立即**选择一个工具执行,或者如果验证完成,输出 Final Answer 汇总所有验证结果。",
|
||||
})
|
||||
|
||||
# 处理结果
|
||||
|
|
@ -679,13 +699,32 @@ class VerificationAgent(BaseAgent):
|
|||
final_result = None
|
||||
|
||||
if final_result and "findings" in final_result:
|
||||
# 🔥 DEBUG: Log what LLM returned for verdict diagnosis
|
||||
verdicts_debug = [(f.get("file_path", "?"), f.get("verdict"), f.get("confidence")) for f in final_result["findings"]]
|
||||
logger.info(f"[{self.name}] LLM returned verdicts: {verdicts_debug}")
|
||||
|
||||
for f in final_result["findings"]:
|
||||
# 🔥 FIX: Normalize verdict - handle missing/empty verdict
|
||||
verdict = f.get("verdict")
|
||||
if not verdict or verdict not in ["confirmed", "likely", "uncertain", "false_positive"]:
|
||||
# Try to infer verdict from other fields
|
||||
if f.get("is_verified") is True:
|
||||
verdict = "confirmed"
|
||||
elif f.get("confidence", 0) >= 0.8:
|
||||
verdict = "likely"
|
||||
elif f.get("confidence", 0) <= 0.3:
|
||||
verdict = "false_positive"
|
||||
else:
|
||||
verdict = "uncertain"
|
||||
logger.warning(f"[{self.name}] Missing/invalid verdict for {f.get('file_path', '?')}, inferred as: {verdict}")
|
||||
|
||||
verified = {
|
||||
**f,
|
||||
"is_verified": f.get("verdict") == "confirmed" or (
|
||||
f.get("verdict") == "likely" and f.get("confidence", 0) >= 0.8
|
||||
"verdict": verdict, # 🔥 Ensure verdict is set
|
||||
"is_verified": verdict == "confirmed" or (
|
||||
verdict == "likely" and f.get("confidence", 0) >= 0.8
|
||||
),
|
||||
"verified_at": datetime.now(timezone.utc).isoformat() if f.get("verdict") in ["confirmed", "likely"] else None,
|
||||
"verified_at": datetime.now(timezone.utc).isoformat() if verdict in ["confirmed", "likely"] else None,
|
||||
}
|
||||
|
||||
# 添加修复建议
|
||||
|
|
|
|||
|
|
@ -473,10 +473,10 @@ class EventManager:
|
|||
buffered_count += 1
|
||||
yield buffered_event
|
||||
|
||||
# 🔥 为缓存事件添加小延迟,但比之前少很多(避免拖慢)
|
||||
# 🔥 取消人为延迟,防止队列堆积
|
||||
event_type = buffered_event.get("event_type")
|
||||
if event_type == "thinking_token":
|
||||
await asyncio.sleep(0.005) # 5ms for tokens (reduced from 15ms)
|
||||
# if event_type == "thinking_token":
|
||||
# await asyncio.sleep(0.005)
|
||||
# 其他事件不加延迟,快速发送
|
||||
|
||||
# 检查是否是结束事件
|
||||
|
|
@ -513,9 +513,9 @@ class EventManager:
|
|||
|
||||
yield event
|
||||
|
||||
# 🔥 为 thinking_token 添加微延迟确保流式效果
|
||||
if event_type == "thinking_token":
|
||||
await asyncio.sleep(0.01) # 10ms
|
||||
# 🔥 取消人为延迟,防止队列堆积
|
||||
# if event_type == "thinking_token":
|
||||
# await asyncio.sleep(0.01)
|
||||
|
||||
# 检查是否是结束事件
|
||||
if event.get("event_type") in ["task_complete", "task_error", "task_cancel"]:
|
||||
|
|
|
|||
|
|
@ -219,11 +219,6 @@ from .system_prompts import (
|
|||
VULNERABILITY_PRIORITIES,
|
||||
TOOL_USAGE_GUIDE,
|
||||
MULTI_AGENT_RULES,
|
||||
ORCHESTRATOR_SYSTEM_PROMPT,
|
||||
ANALYSIS_SYSTEM_PROMPT,
|
||||
VERIFICATION_SYSTEM_PROMPT,
|
||||
RECON_SYSTEM_PROMPT,
|
||||
get_system_prompt,
|
||||
build_enhanced_prompt,
|
||||
)
|
||||
|
||||
|
|
@ -242,11 +237,6 @@ __all__ = [
|
|||
"VULNERABILITY_PRIORITIES",
|
||||
"TOOL_USAGE_GUIDE",
|
||||
"MULTI_AGENT_RULES",
|
||||
"ORCHESTRATOR_SYSTEM_PROMPT",
|
||||
"ANALYSIS_SYSTEM_PROMPT",
|
||||
"VERIFICATION_SYSTEM_PROMPT",
|
||||
"RECON_SYSTEM_PROMPT",
|
||||
"get_system_prompt",
|
||||
"build_enhanced_prompt",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -139,44 +139,48 @@ TOOL_USAGE_GUIDE = """
|
|||
| `dataflow_analysis` | 数据流追踪验证 |
|
||||
| `code_analysis` | 代码结构分析 |
|
||||
|
||||
#### 辅助工具
|
||||
#### 辅助工具(RAG 优先!)
|
||||
| 工具 | 用途 |
|
||||
|------|------|
|
||||
| `rag_query` | **语义搜索代码**(推荐!比 search_code 更智能,理解代码含义) |
|
||||
| `security_search` | **安全相关代码搜索**(专门查找安全敏感代码) |
|
||||
| `function_context` | **函数上下文搜索**(获取函数的调用关系和上下文) |
|
||||
| `list_files` | 了解项目结构 |
|
||||
| `rag_query` | **🔥 首选代码搜索工具** - 语义搜索,查找业务逻辑和漏洞上下文 |
|
||||
| `security_search` | **🔥 首选安全搜索工具** - 查找特定的安全敏感代码模式 |
|
||||
| `function_context` | **🔥 理解代码结构** - 获取函数调用关系和定义 |
|
||||
| `read_file` | 读取文件内容验证发现 |
|
||||
| `search_code` | 关键词搜索代码(精确匹配) |
|
||||
| `list_files` | ⚠️ **仅用于** 了解根目录结构,**严禁** 用于遍历代码查找内容 |
|
||||
| `search_code` | ⚠️ **仅用于** 查找非常具体的字符串常量,**严禁** 作为主要代码搜索手段 |
|
||||
| `query_security_knowledge` | 查询安全知识库 |
|
||||
|
||||
### 🔍 代码搜索工具对比
|
||||
| 工具 | 特点 | 适用场景 |
|
||||
|------|------|---------|
|
||||
| `rag_query` | **语义搜索**,理解代码含义 | 查找"处理用户输入的函数"、"数据库查询逻辑" |
|
||||
| `security_search` | **安全专用搜索** | 查找"SQL注入相关代码"、"认证授权代码" |
|
||||
| `function_context` | **函数上下文** | 查找某函数的调用者和被调用者 |
|
||||
| `search_code` | **关键词搜索**,精确匹配 | 查找特定函数名、变量名、字符串 |
|
||||
| `rag_query` | **🔥 语义搜索**,理解代码含义 | **首选!** 查找"处理用户输入的函数"、"数据库查询逻辑" |
|
||||
| `security_search` | **🔥 安全专用搜索** | **首选!** 查找"SQL注入相关代码"、"认证授权代码" |
|
||||
| `function_context` | **🔥 函数上下文** | 查找某函数的调用者和被调用者 |
|
||||
| `search_code` | **❌ 关键词搜索**,仅精确匹配 | **不推荐**,仅用于查找确定的常量或变量名 |
|
||||
|
||||
**推荐**:
|
||||
1. 查找安全相关代码时优先使用 `security_search`
|
||||
2. 理解函数关系时使用 `function_context`
|
||||
3. 通用语义搜索使用 `rag_query`
|
||||
4. 精确匹配时使用 `search_code`
|
||||
**❌ 严禁行为**:
|
||||
1. **不要** 使用 `list_files` 递归列出所有文件来查找代码
|
||||
2. **不要** 使用 `search_code` 搜索通用关键词(如 "function", "user"),这会产生大量无用结果
|
||||
|
||||
**✅ 推荐行为**:
|
||||
1. **始终优先使用 RAG 工具** (`rag_query`, `security_search`)
|
||||
2. `rag_query` 可以理解自然语言,如 "Show me the login function"
|
||||
3. 仅在确实需要精确匹配特定字符串时才使用 `search_code`
|
||||
|
||||
### 📋 推荐分析流程
|
||||
|
||||
#### 第一步:快速侦察(5%时间)
|
||||
```
|
||||
Action: list_files
|
||||
Action Input: {"directory": "."}
|
||||
```
|
||||
了解项目结构、技术栈、入口点
|
||||
Action: list_files
|
||||
Action Input: {"directory": ".", "max_depth": 2}
|
||||
```
|
||||
了解项目根目录结构(不要遍历全项目)
|
||||
|
||||
**语义搜索高风险代码(推荐!):**
|
||||
**🔥 RAG 搜索关键逻辑(RAG 优先!):**
|
||||
```
|
||||
Action: rag_query
|
||||
Action Input: {"query": "处理用户输入或执行数据库查询的函数", "top_k": 10}
|
||||
Action Input: {"query": "用户的登录认证逻辑在哪里?", "top_k": 5}
|
||||
```
|
||||
|
||||
#### 第二步:外部工具全面扫描(60%时间)⚡重点!
|
||||
|
|
@ -303,334 +307,6 @@ MULTI_AGENT_RULES = """
|
|||
</multi_agent_rules>
|
||||
"""
|
||||
|
||||
# ====== 各Agent专用提示词 ======
|
||||
|
||||
ORCHESTRATOR_SYSTEM_PROMPT = f"""你是 DeepAudit 安全审计平台的编排 Agent。
|
||||
|
||||
{CORE_SECURITY_PRINCIPLES}
|
||||
|
||||
## 你的职责
|
||||
作为编排层,你负责协调整个安全审计流程:
|
||||
1. 分析项目信息,制定审计策略
|
||||
2. 调度子Agent执行具体任务
|
||||
3. 收集和整合分析结果
|
||||
4. 生成最终审计报告
|
||||
|
||||
## 可用操作
|
||||
|
||||
### dispatch_agent - 调度子Agent
|
||||
```
|
||||
Action: dispatch_agent
|
||||
Action Input: {{"agent": "recon|analysis|verification", "task": "任务描述", "context": "上下文"}}
|
||||
```
|
||||
|
||||
### summarize - 汇总发现
|
||||
```
|
||||
Action: summarize
|
||||
Action Input: {{"findings": [...], "analysis": "分析"}}
|
||||
```
|
||||
|
||||
### finish - 完成审计
|
||||
```
|
||||
Action: finish
|
||||
Action Input: {{"conclusion": "结论", "findings": [...], "recommendations": [...]}}
|
||||
```
|
||||
|
||||
## 审计流程
|
||||
1. 调度 recon Agent 收集项目信息
|
||||
2. 基于 recon 结果,调度 analysis Agent 进行漏洞分析
|
||||
3. 对高置信度发现,调度 verification Agent 验证
|
||||
4. 汇总所有发现,生成最终报告
|
||||
|
||||
{MULTI_AGENT_RULES}
|
||||
|
||||
## 输出格式
|
||||
```
|
||||
Thought: [分析和决策过程]
|
||||
Action: [操作名称]
|
||||
Action Input: [JSON参数]
|
||||
```
|
||||
"""
|
||||
|
||||
ANALYSIS_SYSTEM_PROMPT = f"""你是 DeepAudit 的漏洞分析 Agent,一个专业的安全分析专家。
|
||||
|
||||
{CORE_SECURITY_PRINCIPLES}
|
||||
|
||||
{VULNERABILITY_PRIORITIES}
|
||||
|
||||
{TOOL_USAGE_GUIDE}
|
||||
|
||||
## 你的职责
|
||||
作为分析层,你负责深度安全分析:
|
||||
1. 识别代码中的安全漏洞
|
||||
2. 追踪数据流和攻击路径
|
||||
3. 评估漏洞的严重性和影响
|
||||
4. 提供专业的修复建议
|
||||
|
||||
## 分析策略
|
||||
|
||||
### ⚠️ 核心原则:外部工具优先!
|
||||
|
||||
**必须首先使用外部专业安全工具进行扫描!** 这些工具有经过验证的规则库和更低的误报率。
|
||||
|
||||
### 第一步:外部工具全面扫描(最重要!)⭐⭐⭐
|
||||
**根据项目技术栈,选择并执行以下工具:**
|
||||
|
||||
**所有项目必做:**
|
||||
- `semgrep_scan`: 使用规则 "p/security-audit" 或 "p/owasp-top-ten" 进行全面扫描
|
||||
- `gitleaks_scan`: 检测密钥泄露
|
||||
|
||||
**Python项目必做:**
|
||||
- `bandit_scan`: Python专用安全扫描
|
||||
- `safety_scan`: 依赖漏洞检查
|
||||
|
||||
**Node.js项目必做:**
|
||||
- `npm_audit`: 依赖漏洞检查
|
||||
|
||||
**大型项目推荐:**
|
||||
- `kunlun_scan`: Kunlun-M深度代码审计
|
||||
- `osv_scan`: 开源漏洞扫描
|
||||
|
||||
### 第二步:分析外部工具结果
|
||||
对外部工具发现的问题进行深入分析:
|
||||
- 使用 `read_file` 查看完整代码上下文
|
||||
- 使用 `dataflow_analysis` 追踪数据流
|
||||
- 理解业务逻辑,排除误报
|
||||
|
||||
### 第三步:补充扫描(仅在需要时)
|
||||
如果外部工具覆盖不足,使用内置工具补充:
|
||||
- `smart_scan`: 综合智能扫描
|
||||
- `pattern_match`: 正则模式匹配
|
||||
|
||||
### 第四步:验证和报告
|
||||
- 确认漏洞可利用性
|
||||
- 评估实际影响
|
||||
- 输出结构化的漏洞报告
|
||||
|
||||
## 输出格式
|
||||
|
||||
### 中间步骤
|
||||
```
|
||||
Thought: [分析思考]
|
||||
Action: [工具名称]
|
||||
Action Input: {{"参数": "值"}}
|
||||
```
|
||||
|
||||
### 最终输出
|
||||
```
|
||||
Final Answer: {{
|
||||
"findings": [
|
||||
{{
|
||||
"vulnerability_type": "漏洞类型",
|
||||
"severity": "critical|high|medium|low",
|
||||
"title": "漏洞标题",
|
||||
"description": "详细描述",
|
||||
"file_path": "文件路径",
|
||||
"line_start": 行号,
|
||||
"code_snippet": "代码片段",
|
||||
"source": "污点来源",
|
||||
"sink": "危险函数",
|
||||
"suggestion": "修复建议",
|
||||
"confidence": 0.9
|
||||
}}
|
||||
],
|
||||
"summary": "分析总结"
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
||||
VERIFICATION_SYSTEM_PROMPT = f"""你是 DeepAudit 的验证 Agent,负责验证分析Agent发现的潜在漏洞。
|
||||
|
||||
{CORE_SECURITY_PRINCIPLES}
|
||||
|
||||
## 你的职责
|
||||
作为验证层,你负责:
|
||||
1. 验证漏洞是否真实存在
|
||||
2. 分析漏洞的可利用性
|
||||
3. 评估实际安全影响
|
||||
4. 提供最终置信度评估
|
||||
|
||||
## 验证方法
|
||||
|
||||
### 1. 外部工具交叉验证 ⭐⭐⭐(推荐!)
|
||||
使用不同的外部工具验证发现:
|
||||
- 使用 `semgrep_scan` 配合特定规则验证
|
||||
- 使用 `bandit_scan` 交叉确认 Python 漏洞
|
||||
- 如果多个工具都报告同一问题,置信度更高
|
||||
|
||||
### 2. 上下文验证
|
||||
- 检查完整的代码上下文
|
||||
- 理解数据处理逻辑
|
||||
- 验证安全控制是否存在
|
||||
|
||||
### 3. 数据流验证
|
||||
- 追踪从输入到输出的完整路径
|
||||
- 识别中间的验证和过滤
|
||||
- 确认是否存在有效的安全控制
|
||||
|
||||
### 4. 配置验证
|
||||
- 检查安全配置
|
||||
- 验证框架安全特性
|
||||
- 评估防护措施
|
||||
|
||||
### 5. 沙箱验证(高置信度漏洞)
|
||||
- 使用 `sandbox_execute` 或漏洞专用测试工具
|
||||
- 构造 PoC 验证可利用性
|
||||
- 记录验证结果
|
||||
|
||||
## 输出格式
|
||||
|
||||
```
|
||||
Final Answer: {{
|
||||
"verified_findings": [
|
||||
{{
|
||||
"original_finding": {{...}},
|
||||
"is_verified": true/false,
|
||||
"verification_method": "使用的验证方法",
|
||||
"cross_tool_results": {{"semgrep": "...", "bandit": "..."}},
|
||||
"evidence": "验证证据",
|
||||
"final_severity": "最终严重程度",
|
||||
"final_confidence": 0.95,
|
||||
"poc": "概念验证(如有)",
|
||||
"remediation": "详细修复建议"
|
||||
}}
|
||||
],
|
||||
"summary": "验证总结"
|
||||
}}
|
||||
```
|
||||
|
||||
{TOOL_USAGE_GUIDE}
|
||||
"""
|
||||
|
||||
RECON_SYSTEM_PROMPT = f"""你是 DeepAudit 的侦察 Agent,负责收集和分析项目信息。
|
||||
|
||||
## 你的职责
|
||||
作为侦察层,你负责:
|
||||
1. 分析项目结构和技术栈
|
||||
2. 识别关键入口点
|
||||
3. 发现配置文件和敏感区域
|
||||
4. **推荐需要使用的外部安全工具**
|
||||
5. 提供初步风险评估
|
||||
|
||||
## 侦察目标
|
||||
|
||||
### 1. 技术栈识别(用于选择外部工具)
|
||||
- 编程语言和版本
|
||||
- Web框架(Django, Flask, FastAPI, Express等)
|
||||
- 数据库类型
|
||||
- 前端框架
|
||||
- **根据技术栈推荐外部工具:**
|
||||
- Python项目 → bandit_scan, safety_scan
|
||||
- Node.js项目 → npm_audit
|
||||
- 所有项目 → semgrep_scan, gitleaks_scan
|
||||
- 大型项目 → kunlun_scan, osv_scan
|
||||
|
||||
### 2. 入口点发现
|
||||
- HTTP路由和API端点
|
||||
- Websocket处理
|
||||
- 定时任务和后台作业
|
||||
- 消息队列消费者
|
||||
|
||||
### 3. 敏感区域定位
|
||||
- 认证和授权代码
|
||||
- 数据库操作
|
||||
- 文件处理
|
||||
- 外部服务调用
|
||||
|
||||
### 4. 配置分析
|
||||
- 安全配置
|
||||
- 调试设置
|
||||
- 密钥管理
|
||||
|
||||
## 工作方式
|
||||
每一步,你需要输出:
|
||||
|
||||
```
|
||||
Thought: [分析当前情况,思考需要收集什么信息]
|
||||
Action: [工具名称]
|
||||
Action Input: {{"参数1": "值1"}}
|
||||
```
|
||||
|
||||
当你完成信息收集后,输出:
|
||||
|
||||
```
|
||||
Thought: [总结收集到的所有信息]
|
||||
Final Answer: [JSON 格式的结果]
|
||||
```
|
||||
|
||||
## 输出格式
|
||||
|
||||
```
|
||||
Final Answer: {{
|
||||
"project_structure": {{...}},
|
||||
"tech_stack": {{
|
||||
"languages": [...],
|
||||
"frameworks": [...],
|
||||
"databases": [...]
|
||||
}},
|
||||
"recommended_tools": {{
|
||||
"must_use": ["semgrep_scan", "gitleaks_scan", ...],
|
||||
"recommended": ["kunlun_scan", ...],
|
||||
"reason": "基于项目技术栈的推荐理由"
|
||||
}},
|
||||
"entry_points": [
|
||||
{{"type": "...", "file": "...", "line": ..., "method": "..."}}
|
||||
],
|
||||
"high_risk_areas": [
|
||||
"文件路径:行号 - 风险描述"
|
||||
],
|
||||
"initial_findings": [
|
||||
{{"title": "...", "file_path": "...", "line_start": ..., "description": "..."}}
|
||||
],
|
||||
"summary": "项目侦察总结"
|
||||
}}
|
||||
```
|
||||
|
||||
## ⚠️ 重要输出要求
|
||||
|
||||
### recommended_tools 格式要求(新增!)
|
||||
**必须**根据项目技术栈推荐外部工具:
|
||||
- `must_use`: 必须使用的工具列表
|
||||
- `recommended`: 推荐使用的工具列表
|
||||
- `reason`: 推荐理由
|
||||
|
||||
### high_risk_areas 格式要求
|
||||
每个高风险区域**必须**包含具体的文件路径,格式为:
|
||||
- `"app.py:36 - SECRET_KEY 硬编码"`
|
||||
- `"utils/file.py:120 - 使用用户输入构造文件路径"`
|
||||
- `"api/views.py:45 - SQL 查询使用字符串拼接"`
|
||||
|
||||
**禁止**输出纯描述性文本如 "File write operations with user-controlled paths",必须指明具体文件。
|
||||
|
||||
### initial_findings 格式要求
|
||||
每个发现**必须**包含:
|
||||
- `title`: 漏洞标题
|
||||
- `file_path`: 具体文件路径
|
||||
- `line_start`: 行号
|
||||
- `description`: 详细描述
|
||||
|
||||
{TOOL_USAGE_GUIDE}
|
||||
"""
|
||||
|
||||
|
||||
def get_system_prompt(agent_type: str) -> str:
|
||||
"""
|
||||
获取指定Agent类型的系统提示词
|
||||
|
||||
Args:
|
||||
agent_type: Agent类型 (orchestrator, analysis, verification, recon)
|
||||
|
||||
Returns:
|
||||
系统提示词
|
||||
"""
|
||||
prompts = {
|
||||
"orchestrator": ORCHESTRATOR_SYSTEM_PROMPT,
|
||||
"analysis": ANALYSIS_SYSTEM_PROMPT,
|
||||
"verification": VERIFICATION_SYSTEM_PROMPT,
|
||||
"recon": RECON_SYSTEM_PROMPT,
|
||||
}
|
||||
return prompts.get(agent_type.lower(), ANALYSIS_SYSTEM_PROMPT)
|
||||
|
||||
|
||||
def build_enhanced_prompt(
|
||||
base_prompt: str,
|
||||
|
|
@ -669,10 +345,5 @@ __all__ = [
|
|||
"VULNERABILITY_PRIORITIES",
|
||||
"TOOL_USAGE_GUIDE",
|
||||
"MULTI_AGENT_RULES",
|
||||
"ORCHESTRATOR_SYSTEM_PROMPT",
|
||||
"ANALYSIS_SYSTEM_PROMPT",
|
||||
"VERIFICATION_SYSTEM_PROMPT",
|
||||
"RECON_SYSTEM_PROMPT",
|
||||
"get_system_prompt",
|
||||
"build_enhanced_prompt",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -992,6 +992,8 @@ class CodeIndexer:
|
|||
indexed_file_hashes = await self.vector_store.get_file_hashes()
|
||||
indexed_files = set(indexed_file_hashes.keys())
|
||||
|
||||
logger.debug(f"📂 已索引文件数: {len(indexed_files)}, file_hashes: {list(indexed_file_hashes.keys())[:5]}...")
|
||||
|
||||
# 收集当前文件
|
||||
current_files = self._collect_files(directory, exclude_patterns, include_patterns)
|
||||
current_file_map: Dict[str, str] = {} # relative_path -> absolute_path
|
||||
|
|
@ -1002,11 +1004,15 @@ class CodeIndexer:
|
|||
|
||||
current_file_set = set(current_file_map.keys())
|
||||
|
||||
logger.debug(f"📁 当前文件数: {len(current_file_set)}, 示例: {list(current_file_set)[:5]}...")
|
||||
|
||||
# 计算差异
|
||||
files_to_add = current_file_set - indexed_files
|
||||
files_to_delete = indexed_files - current_file_set
|
||||
files_to_check = current_file_set & indexed_files
|
||||
|
||||
logger.debug(f"📊 差异分析: 交集={len(files_to_check)}, 新增候选={len(files_to_add)}, 删除候选={len(files_to_delete)}")
|
||||
|
||||
# 检查需要更新的文件(hash 变化)
|
||||
files_to_update: Set[str] = set()
|
||||
for relative_path in files_to_check:
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class CodeChunk:
|
|||
return len(self.content) // 4
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
result = {
|
||||
"id": self.id,
|
||||
"content": self.content,
|
||||
"file_path": self.file_path,
|
||||
|
|
@ -110,8 +110,13 @@ class CodeChunk:
|
|||
"definitions": self.definitions,
|
||||
"security_indicators": self.security_indicators,
|
||||
"estimated_tokens": self.estimated_tokens,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
# 将 metadata 中的字段提升到顶级,确保 file_hash 等字段可以被正确检索
|
||||
if self.metadata:
|
||||
for key, value in self.metadata.items():
|
||||
if key not in result:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
def to_embedding_text(self) -> str:
|
||||
"""生成用于嵌入的文本"""
|
||||
|
|
@ -252,12 +257,21 @@ class TreeSitterParser:
|
|||
node_type = node.type
|
||||
|
||||
# 检查是否是定义节点
|
||||
matched = False
|
||||
for def_category, types in definition_types.items():
|
||||
if node_type in types:
|
||||
name = self._extract_name(node, language)
|
||||
|
||||
# 根据是否有 parent_name 来区分 function 和 method
|
||||
actual_category = def_category
|
||||
if def_category == "function" and parent_name:
|
||||
actual_category = "method"
|
||||
elif def_category == "method" and not parent_name:
|
||||
# 跳过没有 parent 的 method 定义(由 function 类别处理)
|
||||
continue
|
||||
|
||||
definitions.append({
|
||||
"type": def_category,
|
||||
"type": actual_category,
|
||||
"name": name,
|
||||
"parent_name": parent_name,
|
||||
"start_point": node.start_point,
|
||||
|
|
@ -267,15 +281,21 @@ class TreeSitterParser:
|
|||
"node_type": node_type,
|
||||
})
|
||||
|
||||
matched = True
|
||||
|
||||
# 对于类,继续遍历子节点找方法
|
||||
if def_category == "class":
|
||||
for child in node.children:
|
||||
traverse(child, name)
|
||||
return
|
||||
|
||||
# 继续遍历子节点
|
||||
for child in node.children:
|
||||
traverse(child, parent_name)
|
||||
# 匹配到一个类别后就不再匹配其他类别
|
||||
break
|
||||
|
||||
# 如果没有匹配到定义,继续遍历子节点
|
||||
if not matched:
|
||||
for child in node.children:
|
||||
traverse(child, parent_name)
|
||||
|
||||
traverse(tree.root_node)
|
||||
return definitions
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
|||
from urllib.parse import urlparse, quote
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.utils.repo_utils import parse_repository_url
|
||||
from app.models.audit import AuditTask, AuditIssue
|
||||
from app.models.project import Project
|
||||
from app.services.llm.service import LLMService
|
||||
|
|
@ -149,17 +150,8 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
|
|||
|
||||
async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取GitHub仓库分支列表"""
|
||||
match = repo_url.rstrip('/')
|
||||
if match.endswith('.git'):
|
||||
match = match[:-4]
|
||||
if 'github.com/' in match:
|
||||
parts = match.split('github.com/')[-1].split('/')
|
||||
if len(parts) >= 2:
|
||||
owner, repo = parts[0], parts[1]
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "github")
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
branches_url = f"https://api.github.com/repos/{owner}/{repo}/branches?per_page=100"
|
||||
branches_data = await github_api(branches_url, token)
|
||||
|
|
@ -172,20 +164,11 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
|
||||
async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取Gitea仓库分支列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
repo_info = parse_repository_url(repo_url, "gitea")
|
||||
base_url = repo_info['base_url'] # This is {base}/api/v1
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
# 提取Owner和Repo: path通常是 /owner/repo.git 或 /owner/repo
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
parts = path.split('/')
|
||||
if len(parts) < 2:
|
||||
raise Exception("Gitea 仓库 URL 格式错误")
|
||||
|
||||
owner, repo = parts[0], parts[1]
|
||||
|
||||
branches_url = f"{base}/api/v1/repos/{owner}/{repo}/branches"
|
||||
branches_url = f"{base_url}/repos/{owner}/{repo}/branches"
|
||||
branches_data = await gitea_api(branches_url, token)
|
||||
|
||||
return [b["name"] for b in branches_data]
|
||||
|
|
@ -194,7 +177,6 @@ async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取GitLab仓库分支列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
extracted_token = token
|
||||
if parsed.username:
|
||||
|
|
@ -203,14 +185,11 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
elif parsed.username and not parsed.password:
|
||||
extracted_token = parsed.username
|
||||
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
if not path:
|
||||
raise Exception("GitLab 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "gitlab")
|
||||
base_url = repo_info['base_url']
|
||||
project_path = quote(repo_info['project_path'], safe='')
|
||||
|
||||
project_path = quote(path, safe='')
|
||||
branches_url = f"{base}/api/v4/projects/{project_path}/repository/branches?per_page=100"
|
||||
branches_url = f"{base_url}/projects/{project_path}/repository/branches?per_page=100"
|
||||
branches_data = await gitlab_api(branches_url, extracted_token)
|
||||
|
||||
return [b["name"] for b in branches_data]
|
||||
|
|
@ -219,17 +198,8 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取GitHub仓库文件列表"""
|
||||
# 解析仓库URL
|
||||
match = repo_url.rstrip('/')
|
||||
if match.endswith('.git'):
|
||||
match = match[:-4]
|
||||
if 'github.com/' in match:
|
||||
parts = match.split('github.com/')[-1].split('/')
|
||||
if len(parts) >= 2:
|
||||
owner, repo = parts[0], parts[1]
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "github")
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
# 获取仓库文件树
|
||||
tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
||||
|
|
@ -251,7 +221,6 @@ async def get_github_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取GitLab仓库文件列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
# 从URL中提取token(如果存在)
|
||||
extracted_token = token
|
||||
|
|
@ -262,16 +231,12 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
extracted_token = parsed.username
|
||||
|
||||
# 解析项目路径
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
if not path:
|
||||
raise Exception("GitLab 仓库 URL 格式错误")
|
||||
|
||||
project_path = quote(path, safe='')
|
||||
repo_info = parse_repository_url(repo_url, "gitlab")
|
||||
base_url = repo_info['base_url'] # {base}/api/v4
|
||||
project_path = quote(repo_info['project_path'], safe='')
|
||||
|
||||
# 获取仓库文件树
|
||||
tree_url = f"{base}/api/v4/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
|
||||
tree_url = f"{base_url}/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
|
||||
tree_data = await gitlab_api(tree_url, extracted_token)
|
||||
|
||||
files = []
|
||||
|
|
@ -279,7 +244,7 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
|
||||
files.append({
|
||||
"path": item["path"],
|
||||
"url": f"{base}/api/v4/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
|
||||
"url": f"{base_url}/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
|
||||
"token": extracted_token
|
||||
})
|
||||
|
||||
|
|
@ -289,40 +254,23 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
|
||||
async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取Gitea仓库文件列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
parts = path.split('/')
|
||||
if len(parts) < 2:
|
||||
raise Exception("Gitea 仓库 URL 格式错误")
|
||||
|
||||
owner, repo = parts[0], parts[1]
|
||||
repo_info = parse_repository_url(repo_url, "gitea")
|
||||
base_url = repo_info['base_url']
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
|
||||
# 可以直接使用分支名作为sha
|
||||
tree_url = f"{base}/api/v1/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
||||
tree_url = f"{base_url}/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
||||
tree_data = await gitea_api(tree_url, token)
|
||||
|
||||
files = []
|
||||
for item in tree_data.get("tree", []):
|
||||
# Gitea API returns 'type': 'blob' for files
|
||||
if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
|
||||
# Gitea raw file URL: {base}/{owner}/{repo}/raw/branch/{branch}/{path}
|
||||
# 或者 API: /repos/{owner}/{repo}/contents/{filepath}?ref={branch} (get content, base64)
|
||||
# 这里使用 raw URL 可能会更方便,但要注意私有仓库可能需要token访问raw
|
||||
# Gitea raw URL usually works with token in header or query param.
|
||||
# Standard Gitea: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} (API) returns raw content?
|
||||
# Actually Gitea raw url: {base}/{owner}/{repo}/raw/branch/{branch}/{path} or /raw/tag or /raw/commit
|
||||
|
||||
# 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} ==> 实际是 /repos/{owner}/{repo}/raw/{path} (ref通过query param?)
|
||||
# 查阅文档,Gitea API v1 /repos/{owner}/{repo}/raw/{filepath} 接受 ref query param
|
||||
# URL: {base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={branch}
|
||||
# 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch}
|
||||
files.append({
|
||||
"path": item["path"],
|
||||
"url": f"{base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
|
||||
"url": f"{base_url}/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
|
||||
"token": token # 传递token以便fetch_file_content使用
|
||||
})
|
||||
|
||||
|
|
@ -482,11 +430,11 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
|||
# 使用提取的 token 或用户配置的 token
|
||||
|
||||
if repo_type == "gitlab":
|
||||
token_to_use = extracted_token or gitlab_token
|
||||
token_to_use = file_info.get('token') or gitlab_token
|
||||
if token_to_use:
|
||||
headers["PRIVATE-TOKEN"] = token_to_use
|
||||
elif repo_type == "gitea":
|
||||
token_to_use = extracted_token or gitea_token
|
||||
token_to_use = file_info.get('token') or gitea_token
|
||||
if token_to_use:
|
||||
headers["Authorization"] = f"token {token_to_use}"
|
||||
elif repo_type == "github":
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
from urllib.parse import urlparse, urlunparse
|
||||
from typing import Dict, Optional
|
||||
|
||||
def parse_repository_url(repo_url: str, repo_type: str) -> Dict[str, str]:
|
||||
"""
|
||||
Parses a repository URL and returns its components.
|
||||
|
||||
Args:
|
||||
repo_url: The repository URL.
|
||||
repo_type: The type of repository ('github', 'gitlab', 'gitea').
|
||||
|
||||
Returns:
|
||||
A dictionary containing parsed components:
|
||||
- base_url: The API base URL (for self-hosted instances) or default API URL.
|
||||
- owner: The owner/namespace of the repository.
|
||||
- repo: The repository name.
|
||||
- server_url: The base URL of the server (scheme + netloc).
|
||||
|
||||
Raises:
|
||||
ValueError: If the URL is invalid or schema/domain check fails.
|
||||
"""
|
||||
if not repo_url:
|
||||
raise ValueError(f"{repo_type} 仓库 URL 不能为空")
|
||||
|
||||
# Basic sanitization
|
||||
repo_url = repo_url.strip()
|
||||
|
||||
# Check scheme to prevent SSRF (only allow http and https)
|
||||
parsed = urlparse(repo_url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
raise ValueError(f"{repo_type} 仓库 URL 必须使用 http 或 https 协议")
|
||||
|
||||
# Remove .git suffix if present
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
|
||||
path_parts = path.split('/')
|
||||
if len(path_parts) < 2:
|
||||
raise ValueError(f"{repo_type} 仓库 URL 格式错误")
|
||||
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
if repo_type == "github":
|
||||
# Handle github.com specifically if needed, or assume path_parts are owner/repo
|
||||
# Case: https://github.com/owner/repo
|
||||
if 'github.com' in parsed.netloc:
|
||||
owner, repo = path_parts[-2], path_parts[-1]
|
||||
api_base = "https://api.github.com"
|
||||
else:
|
||||
# Enterprise GitHub or similar?
|
||||
owner, repo = path_parts[-2], path_parts[-1]
|
||||
api_base = f"{base}/api/v3" # Assumption for GHE
|
||||
|
||||
elif repo_type == "gitlab":
|
||||
# GitLab supports subgroups, so path could be group/subgroup/repo
|
||||
# But commonly we just need project path (URL encoded)
|
||||
# We'll treat the full path as the project path identifier
|
||||
repo = path_parts[-1]
|
||||
owner = "/".join(path_parts[:-1])
|
||||
api_base = f"{base}/api/v4"
|
||||
|
||||
elif repo_type == "gitea":
|
||||
# Gitea: /owner/repo
|
||||
owner, repo = path_parts[0], path_parts[1]
|
||||
api_base = f"{base}/api/v1"
|
||||
|
||||
else:
|
||||
raise ValueError(f"不支持的仓库类型: {repo_type}")
|
||||
|
||||
return {
|
||||
"base_url": api_base,
|
||||
"owner": owner,
|
||||
"repo": repo,
|
||||
"project_path": path, # Useful for GitLab
|
||||
"server_url": base
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "🚀 DeepAudit 后端启动中..."
|
||||
|
||||
# 等待 PostgreSQL 就绪
|
||||
echo "⏳ 等待数据库连接..."
|
||||
max_retries=30
|
||||
retry_count=0
|
||||
|
||||
while [ $retry_count -lt $max_retries ]; do
|
||||
if .venv/bin/python -c "
|
||||
import asyncio
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
import os
|
||||
|
||||
async def check_db():
|
||||
engine = create_async_engine(os.environ.get('DATABASE_URL', ''))
|
||||
try:
|
||||
async with engine.connect() as conn:
|
||||
await conn.execute(text('SELECT 1'))
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
from sqlalchemy import text
|
||||
exit(0 if asyncio.run(check_db()) else 1)
|
||||
" 2>/dev/null; then
|
||||
echo "✅ 数据库连接成功"
|
||||
break
|
||||
fi
|
||||
|
||||
retry_count=$((retry_count + 1))
|
||||
echo " 重试 $retry_count/$max_retries..."
|
||||
sleep 2
|
||||
done
|
||||
|
||||
if [ $retry_count -eq $max_retries ]; then
|
||||
echo "❌ 无法连接到数据库,请检查 DATABASE_URL 配置"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 运行数据库迁移
|
||||
echo "📦 执行数据库迁移..."
|
||||
.venv/bin/alembic upgrade head
|
||||
|
||||
echo "✅ 数据库迁移完成"
|
||||
|
||||
# 启动 uvicorn
|
||||
echo "🌐 启动 API 服务..."
|
||||
exec .venv/bin/uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "deepaudit-backend"
|
||||
version = "3.0.0"
|
||||
version = "3.0.1"
|
||||
description = "DeepAudit Backend API - AI-Powered Code Security Audit Platform"
|
||||
requires-python = ">=3.11"
|
||||
readme = "README.md"
|
||||
|
|
@ -202,7 +202,7 @@ exclude_lines = [
|
|||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
# ============ UV Configuration ============
|
||||
# ============ Dependency Groups (PEP 735) ============
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
|
|
|
|||
|
|
@ -0,0 +1,111 @@
|
|||
# =============================================
|
||||
# DeepAudit v3.0.0 生产环境一键部署配置(国内加速版)
|
||||
# =============================================
|
||||
# 使用南京大学镜像站加速拉取 GHCR 镜像
|
||||
# 部署命令: curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/main/docker-compose.prod.cn.yml | docker compose -f - up -d
|
||||
#
|
||||
# 镜像加速说明:
|
||||
# - 原始地址:ghcr.io
|
||||
# - 加速地址:ghcr.nju.edu.cn(南京大学开源镜像站)
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_USER=postgres
|
||||
- POSTGRES_PASSWORD=postgres
|
||||
- POSTGRES_DB=deepaudit
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
backend:
|
||||
image: ghcr.nju.edu.cn/lintsinghua/deepaudit-backend:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- backend_uploads:/app/uploads
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/deepaudit
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- AGENT_ENABLED=true
|
||||
- SANDBOX_ENABLED=true
|
||||
- SANDBOX_IMAGE=ghcr.nju.edu.cn/lintsinghua/deepaudit-sandbox:latest
|
||||
# LLM 配置 - 请根据需要修改
|
||||
- LLM_PROVIDER=${LLM_PROVIDER:-openai}
|
||||
- LLM_MODEL=${LLM_MODEL:-gpt-4o}
|
||||
- LLM_API_KEY=${LLM_API_KEY:-your-api-key-here}
|
||||
- LLM_BASE_URL=${LLM_BASE_URL:-}
|
||||
# 禁用代理
|
||||
- HTTP_PROXY=
|
||||
- HTTPS_PROXY=
|
||||
- NO_PROXY=*
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
db-migrate:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
# 数据库迁移服务 - 在后端启动前自动执行
|
||||
db-migrate:
|
||||
image: ghcr.nju.edu.cn/lintsinghua/deepaudit-backend:latest
|
||||
restart: "no"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/deepaudit
|
||||
command: [".venv/bin/alembic", "upgrade", "head"]
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
frontend:
|
||||
image: ghcr.nju.edu.cn/lintsinghua/deepaudit-frontend:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:80"
|
||||
depends_on:
|
||||
- backend
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
# 预拉取沙箱镜像(后端会按需调用)
|
||||
sandbox-pull:
|
||||
image: ghcr.nju.edu.cn/lintsinghua/deepaudit-sandbox:latest
|
||||
restart: "no"
|
||||
command: echo "Sandbox image ready"
|
||||
|
||||
networks:
|
||||
deepaudit-network:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
backend_uploads:
|
||||
redis_data:
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
# =============================================
|
||||
# DeepAudit v3.0.0 生产环境一键部署配置
|
||||
# =============================================
|
||||
# 使用预构建的 GHCR 镜像,无需本地构建
|
||||
# 部署命令: curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/main/docker-compose.prod.yml | docker compose -f - up -d
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_USER=postgres
|
||||
- POSTGRES_PASSWORD=postgres
|
||||
- POSTGRES_DB=deepaudit
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
backend:
|
||||
image: ghcr.io/lintsinghua/deepaudit-backend:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- backend_uploads:/app/uploads
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/deepaudit
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- AGENT_ENABLED=true
|
||||
- SANDBOX_ENABLED=true
|
||||
- SANDBOX_IMAGE=ghcr.io/lintsinghua/deepaudit-sandbox:latest
|
||||
# LLM 配置 - 请根据需要修改
|
||||
- LLM_PROVIDER=${LLM_PROVIDER:-openai}
|
||||
- LLM_MODEL=${LLM_MODEL:-gpt-4o}
|
||||
- LLM_API_KEY=${LLM_API_KEY:-your-api-key-here}
|
||||
- LLM_BASE_URL=${LLM_BASE_URL:-}
|
||||
# 禁用代理
|
||||
- HTTP_PROXY=
|
||||
- HTTPS_PROXY=
|
||||
- NO_PROXY=*
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
db-migrate:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
# 数据库迁移服务 - 在后端启动前自动执行
|
||||
db-migrate:
|
||||
image: ghcr.io/lintsinghua/deepaudit-backend:latest
|
||||
restart: "no"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/deepaudit
|
||||
command: [".venv/bin/alembic", "upgrade", "head"]
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
frontend:
|
||||
image: ghcr.io/lintsinghua/deepaudit-frontend:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:80"
|
||||
depends_on:
|
||||
- backend
|
||||
networks:
|
||||
- deepaudit-network
|
||||
|
||||
# 预拉取沙箱镜像(后端会按需调用)
|
||||
sandbox-pull:
|
||||
image: ghcr.io/lintsinghua/deepaudit-sandbox:latest
|
||||
restart: "no"
|
||||
command: echo "Sandbox image ready"
|
||||
|
||||
networks:
|
||||
deepaudit-network:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
backend_uploads:
|
||||
redis_data:
|
||||
|
|
@ -80,6 +80,9 @@ services:
|
|||
- all_proxy=
|
||||
- ALL_PROXY=
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./frontend/dist:/usr/share/nginx/html:ro # 挂载构建产物,本地 pnpm build 后自动生效
|
||||
- ./frontend/nginx.conf:/etc/nginx/conf.d/default.conf:ro # 挂载 nginx 配置
|
||||
ports:
|
||||
- "3000:80" # Nginx 监听 80 端口
|
||||
environment:
|
||||
|
|
@ -110,14 +113,13 @@ services:
|
|||
- deepaudit-network
|
||||
|
||||
# 沙箱镜像构建服务 (漏洞验证必须)
|
||||
# 注意: 此服务仅用于构建镜像,不会持续运行
|
||||
# 注意: 此服务仅用于构建镜像,构建完成后自动退出
|
||||
sandbox:
|
||||
build:
|
||||
context: ./docker/sandbox
|
||||
dockerfile: Dockerfile
|
||||
image: deepaudit/sandbox:latest
|
||||
profiles:
|
||||
- build-only
|
||||
restart: "no"
|
||||
command: echo "Sandbox image built successfully"
|
||||
|
||||
networks:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
# =============================================
|
||||
# 使用 Nginx 提供静态文件和反向代理 (支持 SSE 流式传输)
|
||||
|
||||
FROM node:20-alpine AS builder
|
||||
FROM node:20-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
|
@ -25,7 +25,10 @@ RUN npm config set registry https://registry.npmmirror.com && \
|
|||
# 复制依赖文件
|
||||
COPY package.json pnpm-lock.yaml ./
|
||||
|
||||
RUN pnpm install --no-frozen-lockfile
|
||||
# 增加网络超时设置和并发数限制,防止 ARM 架构构建卡死
|
||||
RUN pnpm config set network-timeout 300000 && \
|
||||
pnpm config set fetch-retries 5 && \
|
||||
pnpm install --no-frozen-lockfile --network-concurrency 1
|
||||
|
||||
# 复制源代码
|
||||
COPY . .
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ echo "Injecting API URL: $API_URL"
|
|||
|
||||
# 在所有 JS 文件中替换占位符
|
||||
# 注意:这里路径必须是 nginx 实际存放文件的路径
|
||||
find /usr/share/nginx/html -name '*.js' -exec sed -i "s|__API_BASE_URL__|${API_URL}|g" {} \;
|
||||
ESCAPED_API_URL=$(echo "${API_URL}" | sed 's/[&/|]/\\&/g')
|
||||
find /usr/share/nginx/html -name '*.js' -exec sed -i "s|__API_BASE_URL__|${ESCAPED_API_URL}|g" {} \;
|
||||
|
||||
# 执行原始命令
|
||||
exec "$@"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "deep-audit",
|
||||
"version": "3.0.0",
|
||||
"version": "3.0.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ interface EmbeddingProvider {
|
|||
interface EmbeddingConfig {
|
||||
provider: string;
|
||||
model: string;
|
||||
api_key: string | null;
|
||||
base_url: string | null;
|
||||
dimensions: number;
|
||||
batch_size: number;
|
||||
|
|
@ -79,15 +80,15 @@ export default function EmbeddingConfigPanel() {
|
|||
loadData();
|
||||
}, []);
|
||||
|
||||
// 当 provider 改变时更新模型
|
||||
useEffect(() => {
|
||||
if (selectedProvider) {
|
||||
const provider = providers.find((p) => p.id === selectedProvider);
|
||||
if (provider) {
|
||||
setSelectedModel(provider.default_model);
|
||||
}
|
||||
// 用户手动切换 provider 时更新为默认模型
|
||||
const handleProviderChange = (newProvider: string) => {
|
||||
setSelectedProvider(newProvider);
|
||||
// 切换 provider 时重置为该 provider 的默认模型
|
||||
const provider = providers.find((p) => p.id === newProvider);
|
||||
if (provider) {
|
||||
setSelectedModel(provider.default_model);
|
||||
}
|
||||
}, [selectedProvider, providers]);
|
||||
};
|
||||
|
||||
const loadData = async () => {
|
||||
try {
|
||||
|
|
@ -104,6 +105,7 @@ export default function EmbeddingConfigPanel() {
|
|||
if (configRes.data) {
|
||||
setSelectedProvider(configRes.data.provider);
|
||||
setSelectedModel(configRes.data.model);
|
||||
setApiKey(configRes.data.api_key || "");
|
||||
setBaseUrl(configRes.data.base_url || "");
|
||||
setBatchSize(configRes.data.batch_size);
|
||||
}
|
||||
|
|
@ -230,7 +232,7 @@ export default function EmbeddingConfigPanel() {
|
|||
{/* 提供商选择 */}
|
||||
<div className="space-y-2">
|
||||
<Label className="text-xs font-bold text-gray-500 uppercase">嵌入模型提供商</Label>
|
||||
<Select value={selectedProvider} onValueChange={setSelectedProvider}>
|
||||
<Select value={selectedProvider} onValueChange={handleProviderChange}>
|
||||
<SelectTrigger className="h-12 cyber-input">
|
||||
<SelectValue placeholder="选择提供商" />
|
||||
</SelectTrigger>
|
||||
|
|
|
|||
|
|
@ -133,11 +133,20 @@ export const StatsPanel = memo(function StatsPanel({ task, findings }: StatsPane
|
|||
|
||||
{/* File progress */}
|
||||
<div className="flex items-center justify-between mt-2 text-[10px]">
|
||||
<span className="text-slate-500">Files analyzed</span>
|
||||
<span className="text-slate-500">Files scanned</span>
|
||||
<span className="text-slate-300 font-mono">
|
||||
{task.analyzed_files}<span className="text-slate-500">/{task.total_files}</span>
|
||||
</span>
|
||||
</div>
|
||||
{/* Files with findings */}
|
||||
{task.files_with_findings > 0 && (
|
||||
<div className="flex items-center justify-between mt-1 text-[10px]">
|
||||
<span className="text-slate-500">Files with findings</span>
|
||||
<span className="text-rose-400 font-mono font-medium">
|
||||
{task.files_with_findings}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Metrics Grid */}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ export interface AgentTask {
|
|||
total_files: number;
|
||||
indexed_files: number;
|
||||
analyzed_files: number;
|
||||
files_with_findings: number; // 有漏洞发现的文件数
|
||||
total_chunks: number;
|
||||
findings_count: number;
|
||||
verified_count: number;
|
||||
|
|
@ -128,6 +129,7 @@ export interface AgentTaskSummary {
|
|||
total_files: number;
|
||||
indexed_files: number;
|
||||
analyzed_files: number;
|
||||
files_with_findings: number;
|
||||
total_chunks: number;
|
||||
findings_count: number;
|
||||
verified_count: number;
|
||||
|
|
|
|||
Loading…
Reference in New Issue