2025-12-11 19:09:10 +08:00
|
|
|
|
"""
|
|
|
|
|
|
DeepAudit LangGraph Runner
|
|
|
|
|
|
基于 LangGraph 的 Agent 审计执行器
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
import logging
|
|
|
|
|
|
import os
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
from typing import Dict, List, Optional, Any, AsyncGenerator
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
|
|
from langgraph.graph import StateGraph, END
|
|
|
|
|
|
from langgraph.checkpoint.memory import MemorySaver
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
from app.services.agent.streaming import StreamHandler, StreamEvent, StreamEventType
|
2025-12-11 19:09:10 +08:00
|
|
|
|
from app.models.agent_task import (
|
|
|
|
|
|
AgentTask, AgentEvent, AgentFinding,
|
|
|
|
|
|
AgentTaskStatus, AgentTaskPhase, AgentEventType,
|
|
|
|
|
|
VulnerabilitySeverity, VulnerabilityType, FindingStatus,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.agent.event_manager import EventManager, AgentEventEmitter
|
|
|
|
|
|
from app.services.agent.tools import (
|
|
|
|
|
|
RAGQueryTool, SecurityCodeSearchTool, FunctionContextTool,
|
|
|
|
|
|
PatternMatchTool, CodeAnalysisTool, DataFlowAnalysisTool, VulnerabilityValidationTool,
|
|
|
|
|
|
FileReadTool, FileSearchTool, ListFilesTool,
|
|
|
|
|
|
SandboxTool, SandboxHttpTool, VulnerabilityVerifyTool, SandboxManager,
|
|
|
|
|
|
SemgrepTool, BanditTool, GitleaksTool, NpmAuditTool, SafetyTool,
|
|
|
|
|
|
TruffleHogTool, OSVScannerTool,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.rag import CodeIndexer, CodeRetriever, EmbeddingService
|
|
|
|
|
|
from app.core.config import settings
|
|
|
|
|
|
|
|
|
|
|
|
from .audit_graph import AuditState, create_audit_graph
|
|
|
|
|
|
from .nodes import ReconNode, AnalysisNode, VerificationNode, ReportNode
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 使用系统统一的 LLMService(支持用户配置)
|
|
|
|
|
|
from app.services.llm.service import LLMService
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentRunner:
|
|
|
|
|
|
"""
|
|
|
|
|
|
DeepAudit LangGraph Agent Runner
|
|
|
|
|
|
|
|
|
|
|
|
基于 LangGraph 状态图的审计执行器
|
|
|
|
|
|
|
|
|
|
|
|
工作流:
|
|
|
|
|
|
START → Recon → Analysis ⟲ → Verification → Report → END
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
db: AsyncSession,
|
|
|
|
|
|
task: AgentTask,
|
|
|
|
|
|
project_root: str,
|
2025-12-11 23:29:04 +08:00
|
|
|
|
user_config: Optional[Dict[str, Any]] = None,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
):
|
|
|
|
|
|
self.db = db
|
|
|
|
|
|
self.task = task
|
|
|
|
|
|
self.project_root = project_root
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 保存用户配置,供 RAG 初始化使用
|
|
|
|
|
|
self.user_config = user_config or {}
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 事件管理 - 传入 db_session_factory 以持久化事件
|
|
|
|
|
|
from app.db.session import async_session_factory
|
|
|
|
|
|
self.event_manager = EventManager(db_session_factory=async_session_factory)
|
2025-12-11 19:23:21 +08:00
|
|
|
|
self.event_emitter = AgentEventEmitter(task.id, self.event_manager)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
2025-12-12 10:39:32 +08:00
|
|
|
|
# 🔥 CRITICAL: 立即创建事件队列,确保在 Agent 开始执行前队列就存在
|
|
|
|
|
|
# 这样即使前端 SSE 连接稍晚,token 事件也不会丢失
|
|
|
|
|
|
self.event_manager.create_queue(task.id)
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 LLM 服务 - 使用用户配置(从系统配置页面获取)
|
|
|
|
|
|
self.llm_service = LLMService(user_config=self.user_config)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
# 工具集
|
|
|
|
|
|
self.tools: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
|
|
|
|
# RAG 组件
|
|
|
|
|
|
self.retriever: Optional[CodeRetriever] = None
|
|
|
|
|
|
self.indexer: Optional[CodeIndexer] = None
|
|
|
|
|
|
|
|
|
|
|
|
# 沙箱
|
|
|
|
|
|
self.sandbox_manager: Optional[SandboxManager] = None
|
|
|
|
|
|
|
|
|
|
|
|
# LangGraph
|
|
|
|
|
|
self.graph: Optional[StateGraph] = None
|
|
|
|
|
|
self.checkpointer = MemorySaver()
|
|
|
|
|
|
|
|
|
|
|
|
# 状态
|
|
|
|
|
|
self._cancelled = False
|
2025-12-11 20:33:46 +08:00
|
|
|
|
self._running_task: Optional[asyncio.Task] = None
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# Agent 引用(用于取消传播)
|
|
|
|
|
|
self._agents: List[Any] = []
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 流式处理器
|
|
|
|
|
|
self.stream_handler = StreamHandler(task.id)
|
|
|
|
|
|
|
|
|
|
|
|
def cancel(self):
|
|
|
|
|
|
"""取消任务"""
|
|
|
|
|
|
self._cancelled = True
|
2025-12-11 23:29:04 +08:00
|
|
|
|
|
|
|
|
|
|
# 🔥 取消所有 Agent
|
|
|
|
|
|
for agent in self._agents:
|
|
|
|
|
|
if hasattr(agent, 'cancel'):
|
|
|
|
|
|
agent.cancel()
|
|
|
|
|
|
logger.debug(f"Cancelled agent: {agent.name if hasattr(agent, 'name') else 'unknown'}")
|
|
|
|
|
|
|
|
|
|
|
|
# 取消运行中的任务
|
2025-12-11 20:33:46 +08:00
|
|
|
|
if self._running_task and not self._running_task.done():
|
|
|
|
|
|
self._running_task.cancel()
|
2025-12-11 23:29:04 +08:00
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
logger.info(f"Task {self.task.id} cancellation requested")
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def is_cancelled(self) -> bool:
|
|
|
|
|
|
"""检查是否已取消"""
|
|
|
|
|
|
return self._cancelled
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
async def initialize(self):
|
|
|
|
|
|
"""初始化 Runner"""
|
|
|
|
|
|
await self.event_emitter.emit_info("🚀 正在初始化 DeepAudit LangGraph Agent...")
|
|
|
|
|
|
|
|
|
|
|
|
# 1. 初始化 RAG 系统
|
|
|
|
|
|
await self._initialize_rag()
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 初始化工具
|
|
|
|
|
|
await self._initialize_tools()
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 构建 LangGraph
|
|
|
|
|
|
await self._build_graph()
|
|
|
|
|
|
|
|
|
|
|
|
await self.event_emitter.emit_info("✅ LangGraph 系统初始化完成")
|
|
|
|
|
|
|
|
|
|
|
|
async def _initialize_rag(self):
|
|
|
|
|
|
"""初始化 RAG 系统"""
|
|
|
|
|
|
await self.event_emitter.emit_info("📚 初始化 RAG 代码检索系统...")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 从用户配置中获取 LLM 配置(用于 Embedding API Key)
|
|
|
|
|
|
# 优先级:用户配置 > 环境变量
|
|
|
|
|
|
user_llm_config = self.user_config.get('llmConfig', {})
|
|
|
|
|
|
|
|
|
|
|
|
# 获取 Embedding 配置(优先使用用户配置的 LLM API Key)
|
|
|
|
|
|
embedding_provider = getattr(settings, 'EMBEDDING_PROVIDER', 'openai')
|
|
|
|
|
|
embedding_model = getattr(settings, 'EMBEDDING_MODEL', 'text-embedding-3-small')
|
|
|
|
|
|
|
|
|
|
|
|
# 🔥 API Key 优先级:用户配置 > 环境变量
|
|
|
|
|
|
embedding_api_key = (
|
|
|
|
|
|
user_llm_config.get('llmApiKey') or
|
|
|
|
|
|
getattr(settings, 'LLM_API_KEY', '') or
|
|
|
|
|
|
''
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 🔥 Base URL 优先级:用户配置 > 环境变量
|
|
|
|
|
|
embedding_base_url = (
|
|
|
|
|
|
user_llm_config.get('llmBaseUrl') or
|
|
|
|
|
|
getattr(settings, 'LLM_BASE_URL', None) or
|
|
|
|
|
|
None
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
embedding_service = EmbeddingService(
|
2025-12-11 23:29:04 +08:00
|
|
|
|
provider=embedding_provider,
|
|
|
|
|
|
model=embedding_model,
|
|
|
|
|
|
api_key=embedding_api_key,
|
|
|
|
|
|
base_url=embedding_base_url,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self.indexer = CodeIndexer(
|
|
|
|
|
|
collection_name=f"project_{self.task.project_id}",
|
2025-12-11 20:33:46 +08:00
|
|
|
|
embedding_service=embedding_service,
|
|
|
|
|
|
persist_directory=settings.VECTOR_DB_PATH,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self.retriever = CodeRetriever(
|
|
|
|
|
|
collection_name=f"project_{self.task.project_id}",
|
2025-12-11 20:33:46 +08:00
|
|
|
|
embedding_service=embedding_service,
|
|
|
|
|
|
persist_directory=settings.VECTOR_DB_PATH,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"RAG initialization failed: {e}")
|
|
|
|
|
|
await self.event_emitter.emit_warning(f"RAG 系统初始化失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
async def _initialize_tools(self):
|
|
|
|
|
|
"""初始化工具集"""
|
2025-12-11 23:29:04 +08:00
|
|
|
|
await self.event_emitter.emit_info("初始化 Agent 工具集...")
|
|
|
|
|
|
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
# 🔥 导入新工具
|
|
|
|
|
|
from app.services.agent.tools import (
|
|
|
|
|
|
ThinkTool, ReflectTool,
|
|
|
|
|
|
CreateVulnerabilityReportTool,
|
|
|
|
|
|
)
|
|
|
|
|
|
# 🔥 导入知识查询工具
|
|
|
|
|
|
from app.services.agent.knowledge import (
|
|
|
|
|
|
SecurityKnowledgeQueryTool,
|
|
|
|
|
|
GetVulnerabilityKnowledgeTool,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 🔥 获取排除模式和目标文件
|
|
|
|
|
|
exclude_patterns = self.task.exclude_patterns or []
|
|
|
|
|
|
target_files = self.task.target_files or None
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# ============ 基础工具(所有 Agent 共享)============
|
|
|
|
|
|
base_tools = {
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
"read_file": FileReadTool(self.project_root, exclude_patterns, target_files),
|
|
|
|
|
|
"list_files": ListFilesTool(self.project_root, exclude_patterns, target_files),
|
|
|
|
|
|
# 🔥 新增:思考工具(所有Agent可用)
|
|
|
|
|
|
"think": ThinkTool(),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ============ Recon Agent 专属工具 ============
|
|
|
|
|
|
# 职责:信息收集、项目结构分析、技术栈识别
|
|
|
|
|
|
self.recon_tools = {
|
|
|
|
|
|
**base_tools,
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
"search_code": FileSearchTool(self.project_root, exclude_patterns, target_files),
|
|
|
|
|
|
# 🔥 新增:反思工具
|
|
|
|
|
|
"reflect": ReflectTool(),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# RAG 工具(Recon 用于语义搜索)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
if self.retriever:
|
2025-12-11 23:29:04 +08:00
|
|
|
|
self.recon_tools["rag_query"] = RAGQueryTool(self.retriever)
|
|
|
|
|
|
|
|
|
|
|
|
# ============ Analysis Agent 专属工具 ============
|
|
|
|
|
|
# 职责:漏洞分析、代码审计、模式匹配
|
|
|
|
|
|
self.analysis_tools = {
|
|
|
|
|
|
**base_tools,
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
"search_code": FileSearchTool(self.project_root, exclude_patterns, target_files),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 模式匹配和代码分析
|
|
|
|
|
|
"pattern_match": PatternMatchTool(self.project_root),
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
# TODO: code_analysis 工具暂时禁用,因为 LLM 调用经常失败
|
|
|
|
|
|
# "code_analysis": CodeAnalysisTool(self.llm_service),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
"dataflow_analysis": DataFlowAnalysisTool(self.llm_service),
|
|
|
|
|
|
# 外部静态分析工具
|
|
|
|
|
|
"semgrep_scan": SemgrepTool(self.project_root),
|
|
|
|
|
|
"bandit_scan": BanditTool(self.project_root),
|
|
|
|
|
|
"gitleaks_scan": GitleaksTool(self.project_root),
|
|
|
|
|
|
"trufflehog_scan": TruffleHogTool(self.project_root),
|
|
|
|
|
|
"npm_audit": NpmAuditTool(self.project_root),
|
|
|
|
|
|
"safety_scan": SafetyTool(self.project_root),
|
|
|
|
|
|
"osv_scan": OSVScannerTool(self.project_root),
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
# 🔥 新增:反思工具
|
|
|
|
|
|
"reflect": ReflectTool(),
|
|
|
|
|
|
# 🔥 新增:安全知识查询工具(基于RAG)
|
|
|
|
|
|
"query_security_knowledge": SecurityKnowledgeQueryTool(),
|
|
|
|
|
|
"get_vulnerability_knowledge": GetVulnerabilityKnowledgeTool(),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# RAG 工具(Analysis 用于安全相关代码搜索)
|
|
|
|
|
|
if self.retriever:
|
|
|
|
|
|
self.analysis_tools["security_search"] = SecurityCodeSearchTool(self.retriever)
|
|
|
|
|
|
self.analysis_tools["function_context"] = FunctionContextTool(self.retriever)
|
|
|
|
|
|
|
|
|
|
|
|
# ============ Verification Agent 专属工具 ============
|
|
|
|
|
|
# 职责:漏洞验证、PoC 执行、误报排除
|
|
|
|
|
|
self.verification_tools = {
|
|
|
|
|
|
**base_tools,
|
2025-12-14 12:43:44 +08:00
|
|
|
|
# 验证工具 - 移除旧的 vulnerability_validation 和 dataflow_analysis,强制使用沙箱
|
feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer
- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence
2025-12-12 15:27:12 +08:00
|
|
|
|
# 🔥 新增:漏洞报告工具(仅Verification可用)
|
|
|
|
|
|
"create_vulnerability_report": CreateVulnerabilityReportTool(),
|
|
|
|
|
|
# 🔥 新增:反思工具
|
|
|
|
|
|
"reflect": ReflectTool(),
|
2025-12-11 23:29:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 沙箱工具(仅 Verification Agent 可用)
|
2025-12-14 14:52:06 +08:00
|
|
|
|
self.sandbox_manager = None
|
2025-12-11 19:09:10 +08:00
|
|
|
|
try:
|
2025-12-13 18:45:05 +08:00
|
|
|
|
from app.services.agent.tools.sandbox_tool import SandboxConfig
|
|
|
|
|
|
sandbox_config = SandboxConfig(
|
2025-12-11 19:09:10 +08:00
|
|
|
|
image=settings.SANDBOX_IMAGE,
|
|
|
|
|
|
memory_limit=settings.SANDBOX_MEMORY_LIMIT,
|
|
|
|
|
|
cpu_limit=settings.SANDBOX_CPU_LIMIT,
|
2025-12-13 18:45:05 +08:00
|
|
|
|
timeout=settings.SANDBOX_TIMEOUT,
|
|
|
|
|
|
network_mode=settings.SANDBOX_NETWORK_MODE,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
)
|
2025-12-13 18:45:05 +08:00
|
|
|
|
self.sandbox_manager = SandboxManager(config=sandbox_config)
|
2025-12-14 14:52:06 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"❌ Sandbox Manager initialization failed: {e}")
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
logger.warning(f"Traceback: {traceback.format_exc()}")
|
|
|
|
|
|
# 尝试创建默认管理器作为后备
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.sandbox_manager = SandboxManager()
|
|
|
|
|
|
logger.info("⚠️ Created fallback SandboxManager (Docker might be unavailable)")
|
|
|
|
|
|
except Exception as e2:
|
|
|
|
|
|
logger.error(f"❌ Failed to create fallback SandboxManager: {e2}")
|
2025-12-13 18:45:05 +08:00
|
|
|
|
|
2025-12-14 14:52:06 +08:00
|
|
|
|
# 始终注册沙箱工具,即使 Docker 不可用(工具内部会检查)
|
|
|
|
|
|
if self.sandbox_manager:
|
2025-12-11 23:29:04 +08:00
|
|
|
|
self.verification_tools["sandbox_exec"] = SandboxTool(self.sandbox_manager)
|
|
|
|
|
|
self.verification_tools["sandbox_http"] = SandboxHttpTool(self.sandbox_manager)
|
|
|
|
|
|
self.verification_tools["verify_vulnerability"] = VulnerabilityVerifyTool(self.sandbox_manager)
|
2025-12-14 14:52:06 +08:00
|
|
|
|
logger.info(f"✅ Sandbox tools initialized (Docker available: {self.sandbox_manager.is_available})")
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.error("❌ Sandbox tools NOT initialized due to critical manager failure")
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"✅ Verification tools: {list(self.verification_tools.keys())}")
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 统计总工具数
|
|
|
|
|
|
total_tools = len(set(
|
|
|
|
|
|
list(self.recon_tools.keys()) +
|
|
|
|
|
|
list(self.analysis_tools.keys()) +
|
|
|
|
|
|
list(self.verification_tools.keys())
|
|
|
|
|
|
))
|
|
|
|
|
|
await self.event_emitter.emit_info(f"已加载 {total_tools} 个工具")
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
async def _build_graph(self):
|
|
|
|
|
|
"""构建 LangGraph 审计图"""
|
|
|
|
|
|
await self.event_emitter.emit_info("📊 构建 LangGraph 审计工作流...")
|
|
|
|
|
|
|
|
|
|
|
|
# 导入 Agent
|
|
|
|
|
|
from app.services.agent.agents import ReconAgent, AnalysisAgent, VerificationAgent
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 创建 Agent 实例(每个 Agent 使用专属工具集)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
recon_agent = ReconAgent(
|
|
|
|
|
|
llm_service=self.llm_service,
|
2025-12-11 23:29:04 +08:00
|
|
|
|
tools=self.recon_tools, # Recon 专属工具
|
2025-12-11 19:09:10 +08:00
|
|
|
|
event_emitter=self.event_emitter,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
analysis_agent = AnalysisAgent(
|
|
|
|
|
|
llm_service=self.llm_service,
|
2025-12-11 23:29:04 +08:00
|
|
|
|
tools=self.analysis_tools, # Analysis 专属工具
|
2025-12-11 19:09:10 +08:00
|
|
|
|
event_emitter=self.event_emitter,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
verification_agent = VerificationAgent(
|
|
|
|
|
|
llm_service=self.llm_service,
|
2025-12-11 23:29:04 +08:00
|
|
|
|
tools=self.verification_tools, # Verification 专属工具
|
2025-12-11 19:09:10 +08:00
|
|
|
|
event_emitter=self.event_emitter,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 保存 Agent 引用以便取消时传播信号
|
|
|
|
|
|
self._agents = [recon_agent, analysis_agent, verification_agent]
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
# 创建节点
|
|
|
|
|
|
recon_node = ReconNode(recon_agent, self.event_emitter)
|
|
|
|
|
|
analysis_node = AnalysisNode(analysis_agent, self.event_emitter)
|
|
|
|
|
|
verification_node = VerificationNode(verification_agent, self.event_emitter)
|
|
|
|
|
|
report_node = ReportNode(None, self.event_emitter)
|
|
|
|
|
|
|
|
|
|
|
|
# 构建图
|
|
|
|
|
|
self.graph = create_audit_graph(
|
|
|
|
|
|
recon_node=recon_node,
|
|
|
|
|
|
analysis_node=analysis_node,
|
|
|
|
|
|
verification_node=verification_node,
|
|
|
|
|
|
report_node=report_node,
|
|
|
|
|
|
checkpointer=self.checkpointer,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await self.event_emitter.emit_info("✅ LangGraph 工作流构建完成")
|
|
|
|
|
|
|
|
|
|
|
|
async def run(self) -> Dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
执行 LangGraph 审计
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
最终状态
|
|
|
|
|
|
"""
|
2025-12-11 21:14:32 +08:00
|
|
|
|
final_state = {}
|
|
|
|
|
|
try:
|
|
|
|
|
|
async for event in self.run_with_streaming():
|
|
|
|
|
|
# 收集最终状态
|
|
|
|
|
|
if event.event_type == StreamEventType.TASK_COMPLETE:
|
|
|
|
|
|
final_state = event.data
|
|
|
|
|
|
elif event.event_type == StreamEventType.TASK_ERROR:
|
|
|
|
|
|
final_state = {"success": False, "error": event.data.get("error")}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Agent run failed: {e}", exc_info=True)
|
|
|
|
|
|
final_state = {"success": False, "error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
return final_state
|
2025-12-11 20:33:46 +08:00
|
|
|
|
|
|
|
|
|
|
async def run_with_streaming(self) -> AsyncGenerator[StreamEvent, None]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
带流式输出的审计执行
|
|
|
|
|
|
|
|
|
|
|
|
Yields:
|
|
|
|
|
|
StreamEvent: 流式事件(包含 LLM 思考、工具调用等)
|
|
|
|
|
|
"""
|
2025-12-11 19:09:10 +08:00
|
|
|
|
import time
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 初始化
|
|
|
|
|
|
await self.initialize()
|
|
|
|
|
|
|
|
|
|
|
|
# 更新任务状态
|
|
|
|
|
|
await self._update_task_status(AgentTaskStatus.RUNNING)
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 发射任务开始事件
|
|
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_START,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={"task_id": self.task.id, "message": "🚀 审计任务开始"},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
# 1. 索引代码
|
|
|
|
|
|
await self._index_code()
|
|
|
|
|
|
|
|
|
|
|
|
if self._cancelled:
|
2025-12-11 20:33:46 +08:00
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_CANCEL,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={"message": "任务已取消"},
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
# 2. 收集项目信息
|
|
|
|
|
|
project_info = await self._collect_project_info()
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 构建初始状态
|
2025-12-11 19:26:47 +08:00
|
|
|
|
task_config = {
|
|
|
|
|
|
"target_vulnerabilities": self.task.target_vulnerabilities or [],
|
|
|
|
|
|
"verification_level": self.task.verification_level or "sandbox",
|
|
|
|
|
|
"exclude_patterns": self.task.exclude_patterns or [],
|
|
|
|
|
|
"target_files": self.task.target_files or [],
|
|
|
|
|
|
"max_iterations": self.task.max_iterations or 50,
|
|
|
|
|
|
"timeout_seconds": self.task.timeout_seconds or 1800,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
initial_state: AuditState = {
|
|
|
|
|
|
"project_root": self.project_root,
|
|
|
|
|
|
"project_info": project_info,
|
2025-12-11 19:26:47 +08:00
|
|
|
|
"config": task_config,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"task_id": self.task.id,
|
|
|
|
|
|
"tech_stack": {},
|
|
|
|
|
|
"entry_points": [],
|
|
|
|
|
|
"high_risk_areas": [],
|
|
|
|
|
|
"dependencies": {},
|
|
|
|
|
|
"findings": [],
|
|
|
|
|
|
"verified_findings": [],
|
|
|
|
|
|
"false_positives": [],
|
2025-12-13 12:35:03 +08:00
|
|
|
|
"_verified_findings_update": None, # 🔥 NEW: 验证后的 findings 更新
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"current_phase": "start",
|
|
|
|
|
|
"iteration": 0,
|
2025-12-11 19:26:47 +08:00
|
|
|
|
"max_iterations": self.task.max_iterations or 50,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"should_continue_analysis": False,
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 Agent 协作交接信息
|
|
|
|
|
|
"recon_handoff": None,
|
|
|
|
|
|
"analysis_handoff": None,
|
|
|
|
|
|
"verification_handoff": None,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"messages": [],
|
|
|
|
|
|
"events": [],
|
|
|
|
|
|
"summary": None,
|
|
|
|
|
|
"security_score": None,
|
|
|
|
|
|
"error": None,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 4. 执行 LangGraph with astream_events
|
2025-12-11 19:09:10 +08:00
|
|
|
|
await self.event_emitter.emit_phase_start("langgraph", "🔄 启动 LangGraph 工作流")
|
|
|
|
|
|
|
|
|
|
|
|
run_config = {
|
|
|
|
|
|
"configurable": {
|
|
|
|
|
|
"thread_id": self.task.id,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
final_state = None
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 使用 astream_events 获取详细事件流
|
|
|
|
|
|
try:
|
|
|
|
|
|
async for event in self.graph.astream_events(
|
|
|
|
|
|
initial_state,
|
|
|
|
|
|
config=run_config,
|
|
|
|
|
|
version="v2",
|
|
|
|
|
|
):
|
|
|
|
|
|
if self._cancelled:
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# 处理 LangGraph 事件
|
|
|
|
|
|
stream_event = await self.stream_handler.process_langgraph_event(event)
|
|
|
|
|
|
if stream_event:
|
|
|
|
|
|
# 同步到 event_emitter 以持久化
|
|
|
|
|
|
await self._sync_stream_event_to_db(stream_event)
|
|
|
|
|
|
yield stream_event
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 更新最终状态
|
|
|
|
|
|
if event.get("event") == "on_chain_end":
|
|
|
|
|
|
output = event.get("data", {}).get("output")
|
|
|
|
|
|
if isinstance(output, dict):
|
|
|
|
|
|
final_state = output
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
# 如果 astream_events 不可用,回退到 astream
|
|
|
|
|
|
logger.warning(f"astream_events not available, falling back to astream: {e}")
|
|
|
|
|
|
async for event in self.graph.astream(initial_state, config=run_config):
|
|
|
|
|
|
if self._cancelled:
|
|
|
|
|
|
break
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
for node_name, node_output in event.items():
|
|
|
|
|
|
await self._handle_node_output(node_name, node_output)
|
|
|
|
|
|
|
|
|
|
|
|
# 发射节点事件
|
|
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.NODE_END,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
node_name=node_name,
|
|
|
|
|
|
data={"message": f"节点 {node_name} 完成"},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
phase_map = {
|
|
|
|
|
|
"recon": AgentTaskPhase.RECONNAISSANCE,
|
|
|
|
|
|
"analysis": AgentTaskPhase.ANALYSIS,
|
|
|
|
|
|
"verification": AgentTaskPhase.VERIFICATION,
|
|
|
|
|
|
"report": AgentTaskPhase.REPORTING,
|
|
|
|
|
|
}
|
|
|
|
|
|
if node_name in phase_map:
|
|
|
|
|
|
await self._update_task_phase(phase_map[node_name])
|
|
|
|
|
|
|
|
|
|
|
|
final_state = node_output
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
# 5. 获取最终状态
|
2025-12-13 12:35:03 +08:00
|
|
|
|
# 🔥 CRITICAL FIX: 始终从 graph 获取完整的累积状态
|
|
|
|
|
|
# 因为每个节点只返回自己的输出,findings 等字段是通过 operator.add 累积的
|
|
|
|
|
|
# 直接使用 node_output 会丢失之前节点累积的 findings
|
|
|
|
|
|
graph_state = self.graph.get_state(run_config)
|
|
|
|
|
|
if graph_state and graph_state.values:
|
|
|
|
|
|
# 合并完整状态和最后节点的输出
|
|
|
|
|
|
full_state = graph_state.values
|
|
|
|
|
|
if final_state:
|
|
|
|
|
|
# 保留最后节点的输出(如 summary, security_score)
|
|
|
|
|
|
full_state = {**full_state, **final_state}
|
|
|
|
|
|
final_state = full_state
|
|
|
|
|
|
logger.info(f"[Runner] Got full state from graph with {len(final_state.get('findings', []))} findings")
|
|
|
|
|
|
elif not final_state:
|
|
|
|
|
|
final_state = {}
|
|
|
|
|
|
logger.warning("[Runner] No final state available from graph")
|
|
|
|
|
|
|
|
|
|
|
|
# 🔥 CRITICAL FIX: 如果有验证后的 findings 更新,使用它替换原始 findings
|
|
|
|
|
|
# 这是因为 LangGraph 的 operator.add 累积器不适合更新已有 findings
|
|
|
|
|
|
verified_findings_update = final_state.get("_verified_findings_update")
|
|
|
|
|
|
if verified_findings_update:
|
|
|
|
|
|
logger.info(f"[Runner] Using verified findings update: {len(verified_findings_update)} findings")
|
|
|
|
|
|
final_state["findings"] = verified_findings_update
|
|
|
|
|
|
else:
|
|
|
|
|
|
# 🔥 FALLBACK: 如果没有 _verified_findings_update,尝试从 verified_findings 合并
|
|
|
|
|
|
findings = final_state.get("findings", [])
|
|
|
|
|
|
verified_findings = final_state.get("verified_findings", [])
|
|
|
|
|
|
|
|
|
|
|
|
if verified_findings and findings:
|
|
|
|
|
|
# 创建合并后的 findings 列表
|
|
|
|
|
|
merged_findings = self._merge_findings_with_verification(findings, verified_findings)
|
|
|
|
|
|
final_state["findings"] = merged_findings
|
|
|
|
|
|
logger.info(f"[Runner] Merged findings: {len(merged_findings)} total")
|
|
|
|
|
|
elif verified_findings and not findings:
|
|
|
|
|
|
# 如果只有 verified_findings,直接使用
|
|
|
|
|
|
final_state["findings"] = verified_findings
|
|
|
|
|
|
logger.info(f"[Runner] Using verified_findings directly: {len(verified_findings)}")
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[Runner] Final findings count: {len(final_state.get('findings', []))}")
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
2025-12-11 23:29:04 +08:00
|
|
|
|
# 🔥 检查是否有错误
|
|
|
|
|
|
error = final_state.get("error")
|
|
|
|
|
|
if error:
|
|
|
|
|
|
# 检查是否是 LLM 认证错误
|
|
|
|
|
|
error_str = str(error)
|
|
|
|
|
|
if "AuthenticationError" in error_str or "API key" in error_str or "invalid_api_key" in error_str:
|
|
|
|
|
|
error_message = "LLM API 密钥配置错误。请检查环境变量 LLM_API_KEY 或配置中的 API 密钥是否正确。"
|
|
|
|
|
|
logger.error(f"LLM authentication error: {error}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
error_message = error_str
|
|
|
|
|
|
|
|
|
|
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
# 标记任务为失败
|
|
|
|
|
|
await self._update_task_status(AgentTaskStatus.FAILED, error_message)
|
|
|
|
|
|
await self.event_emitter.emit_task_error(error_message)
|
|
|
|
|
|
|
|
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_ERROR,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={
|
|
|
|
|
|
"error": error_message,
|
|
|
|
|
|
"message": f"❌ 任务失败: {error_message}",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
# 6. 保存发现
|
|
|
|
|
|
findings = final_state.get("findings", [])
|
|
|
|
|
|
await self._save_findings(findings)
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
# 发射发现事件
|
|
|
|
|
|
for finding in findings[:10]: # 限制数量
|
|
|
|
|
|
yield self.stream_handler.create_finding_event(
|
|
|
|
|
|
finding,
|
|
|
|
|
|
is_verified=finding.get("is_verified", False),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
# 7. 更新任务摘要
|
|
|
|
|
|
summary = final_state.get("summary", {})
|
|
|
|
|
|
security_score = final_state.get("security_score", 100)
|
|
|
|
|
|
|
|
|
|
|
|
await self._update_task_summary(
|
|
|
|
|
|
total_findings=len(findings),
|
|
|
|
|
|
verified_count=len(final_state.get("verified_findings", [])),
|
|
|
|
|
|
security_score=security_score,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 8. 完成
|
|
|
|
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
await self._update_task_status(AgentTaskStatus.COMPLETED)
|
|
|
|
|
|
await self.event_emitter.emit_task_complete(
|
|
|
|
|
|
findings_count=len(findings),
|
|
|
|
|
|
duration_ms=duration_ms,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_COMPLETE,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={
|
|
|
|
|
|
"findings_count": len(findings),
|
|
|
|
|
|
"verified_count": len(final_state.get("verified_findings", [])),
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"security_score": security_score,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"duration_ms": duration_ms,
|
|
|
|
|
|
"message": f"✅ 审计完成!发现 {len(findings)} 个漏洞",
|
2025-12-11 19:09:10 +08:00
|
|
|
|
},
|
2025-12-11 20:33:46 +08:00
|
|
|
|
)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
|
await self._update_task_status(AgentTaskStatus.CANCELLED)
|
2025-12-11 20:33:46 +08:00
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_CANCEL,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={"message": "任务已取消"},
|
|
|
|
|
|
)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"LangGraph run failed: {e}", exc_info=True)
|
|
|
|
|
|
await self._update_task_status(AgentTaskStatus.FAILED, str(e))
|
|
|
|
|
|
await self.event_emitter.emit_error(str(e))
|
2025-12-11 20:33:46 +08:00
|
|
|
|
|
|
|
|
|
|
yield StreamEvent(
|
|
|
|
|
|
event_type=StreamEventType.TASK_ERROR,
|
|
|
|
|
|
sequence=self.stream_handler._next_sequence(),
|
|
|
|
|
|
data={"error": str(e), "message": f"❌ 审计失败: {e}"},
|
|
|
|
|
|
)
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
await self._cleanup()
|
|
|
|
|
|
|
2025-12-11 20:33:46 +08:00
|
|
|
|
async def _sync_stream_event_to_db(self, event: StreamEvent):
|
|
|
|
|
|
"""同步流式事件到数据库"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 将 StreamEvent 转换为 AgentEventData
|
|
|
|
|
|
await self.event_manager.add_event(
|
|
|
|
|
|
task_id=self.task.id,
|
|
|
|
|
|
event_type=event.event_type.value,
|
|
|
|
|
|
sequence=event.sequence,
|
|
|
|
|
|
phase=event.phase,
|
|
|
|
|
|
message=event.data.get("message"),
|
|
|
|
|
|
tool_name=event.tool_name,
|
|
|
|
|
|
tool_input=event.data.get("input") or event.data.get("input_params"),
|
|
|
|
|
|
tool_output=event.data.get("output") or event.data.get("output_data"),
|
|
|
|
|
|
tool_duration_ms=event.data.get("duration_ms"),
|
|
|
|
|
|
metadata=event.data,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"Failed to sync stream event to db: {e}")
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
async def _handle_node_output(self, node_name: str, output: Dict[str, Any]):
|
|
|
|
|
|
"""处理节点输出"""
|
|
|
|
|
|
# 发射节点事件
|
|
|
|
|
|
events = output.get("events", [])
|
|
|
|
|
|
for evt in events:
|
|
|
|
|
|
await self.event_emitter.emit_info(
|
|
|
|
|
|
f"[{node_name}] {evt.get('type', 'event')}: {evt.get('data', {})}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 处理新发现
|
|
|
|
|
|
if node_name == "analysis":
|
|
|
|
|
|
new_findings = output.get("findings", [])
|
|
|
|
|
|
if new_findings:
|
|
|
|
|
|
for finding in new_findings[:5]: # 限制事件数量
|
|
|
|
|
|
await self.event_emitter.emit_finding(
|
|
|
|
|
|
title=finding.get("title", "Unknown"),
|
|
|
|
|
|
severity=finding.get("severity", "medium"),
|
|
|
|
|
|
file_path=finding.get("file_path"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 处理验证结果
|
|
|
|
|
|
if node_name == "verification":
|
|
|
|
|
|
verified = output.get("verified_findings", [])
|
|
|
|
|
|
for v in verified[:5]:
|
|
|
|
|
|
await self.event_emitter.emit_info(
|
|
|
|
|
|
f"✅ 已验证: {v.get('title', 'Unknown')}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 处理错误
|
|
|
|
|
|
if output.get("error"):
|
|
|
|
|
|
await self.event_emitter.emit_error(output["error"])
|
|
|
|
|
|
|
|
|
|
|
|
async def _index_code(self):
|
|
|
|
|
|
"""索引代码"""
|
|
|
|
|
|
if not self.indexer:
|
|
|
|
|
|
await self.event_emitter.emit_warning("RAG 未初始化,跳过代码索引")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
await self._update_task_phase(AgentTaskPhase.INDEXING)
|
|
|
|
|
|
await self.event_emitter.emit_phase_start("indexing", "📝 开始代码索引")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
async for progress in self.indexer.index_directory(self.project_root):
|
|
|
|
|
|
if self._cancelled:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
await self.event_emitter.emit_progress(
|
2025-12-11 20:33:46 +08:00
|
|
|
|
progress.processed_files,
|
|
|
|
|
|
progress.total_files,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
f"正在索引: {progress.current_file or 'N/A'}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await self.event_emitter.emit_phase_complete("indexing", "✅ 代码索引完成")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"Code indexing failed: {e}")
|
|
|
|
|
|
await self.event_emitter.emit_warning(f"代码索引失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
async def _collect_project_info(self) -> Dict[str, Any]:
|
|
|
|
|
|
"""收集项目信息"""
|
|
|
|
|
|
info = {
|
|
|
|
|
|
"name": self.task.project.name if self.task.project else "unknown",
|
|
|
|
|
|
"root": self.project_root,
|
|
|
|
|
|
"languages": [],
|
|
|
|
|
|
"file_count": 0,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
exclude_dirs = {
|
|
|
|
|
|
"node_modules", "__pycache__", ".git", "venv", ".venv",
|
|
|
|
|
|
"build", "dist", "target", ".idea", ".vscode",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(self.project_root):
|
|
|
|
|
|
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
|
|
|
|
|
info["file_count"] += len(files)
|
|
|
|
|
|
|
|
|
|
|
|
lang_map = {
|
|
|
|
|
|
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
|
|
|
|
|
|
".java": "Java", ".go": "Go", ".php": "PHP",
|
|
|
|
|
|
".rb": "Ruby", ".rs": "Rust", ".c": "C", ".cpp": "C++",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for f in files:
|
|
|
|
|
|
ext = os.path.splitext(f)[1].lower()
|
|
|
|
|
|
if ext in lang_map and lang_map[ext] not in info["languages"]:
|
|
|
|
|
|
info["languages"].append(lang_map[ext])
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"Failed to collect project info: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
return info
|
|
|
|
|
|
|
|
|
|
|
|
async def _save_findings(self, findings: List[Dict]):
|
|
|
|
|
|
"""保存发现到数据库"""
|
2025-12-13 12:35:03 +08:00
|
|
|
|
logger.info(f"[Runner] Saving {len(findings)} findings to database for task {self.task.id}")
|
|
|
|
|
|
|
|
|
|
|
|
if not findings:
|
|
|
|
|
|
logger.info("[Runner] No findings to save")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
severity_map = {
|
|
|
|
|
|
"critical": VulnerabilitySeverity.CRITICAL,
|
|
|
|
|
|
"high": VulnerabilitySeverity.HIGH,
|
|
|
|
|
|
"medium": VulnerabilitySeverity.MEDIUM,
|
|
|
|
|
|
"low": VulnerabilitySeverity.LOW,
|
|
|
|
|
|
"info": VulnerabilitySeverity.INFO,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type_map = {
|
|
|
|
|
|
"sql_injection": VulnerabilityType.SQL_INJECTION,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"nosql_injection": VulnerabilityType.NOSQL_INJECTION,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"xss": VulnerabilityType.XSS,
|
|
|
|
|
|
"command_injection": VulnerabilityType.COMMAND_INJECTION,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"code_injection": VulnerabilityType.CODE_INJECTION,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"path_traversal": VulnerabilityType.PATH_TRAVERSAL,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"file_inclusion": VulnerabilityType.FILE_INCLUSION,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"ssrf": VulnerabilityType.SSRF,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"xxe": VulnerabilityType.XXE,
|
|
|
|
|
|
"deserialization": VulnerabilityType.DESERIALIZATION,
|
|
|
|
|
|
"auth_bypass": VulnerabilityType.AUTH_BYPASS,
|
|
|
|
|
|
"idor": VulnerabilityType.IDOR,
|
|
|
|
|
|
"sensitive_data_exposure": VulnerabilityType.SENSITIVE_DATA_EXPOSURE,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
"hardcoded_secret": VulnerabilityType.HARDCODED_SECRET,
|
|
|
|
|
|
"weak_crypto": VulnerabilityType.WEAK_CRYPTO,
|
2025-12-11 20:33:46 +08:00
|
|
|
|
"race_condition": VulnerabilityType.RACE_CONDITION,
|
|
|
|
|
|
"business_logic": VulnerabilityType.BUSINESS_LOGIC,
|
|
|
|
|
|
"memory_corruption": VulnerabilityType.MEMORY_CORRUPTION,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for finding in findings:
|
|
|
|
|
|
try:
|
2025-12-12 10:39:32 +08:00
|
|
|
|
# 确保 finding 是字典
|
|
|
|
|
|
if not isinstance(finding, dict):
|
|
|
|
|
|
logger.warning(f"Skipping invalid finding (not a dict): {finding}")
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
db_finding = AgentFinding(
|
|
|
|
|
|
id=str(uuid.uuid4()),
|
|
|
|
|
|
task_id=self.task.id,
|
|
|
|
|
|
vulnerability_type=type_map.get(
|
|
|
|
|
|
finding.get("vulnerability_type", "other"),
|
|
|
|
|
|
VulnerabilityType.OTHER
|
|
|
|
|
|
),
|
|
|
|
|
|
severity=severity_map.get(
|
|
|
|
|
|
finding.get("severity", "medium"),
|
|
|
|
|
|
VulnerabilitySeverity.MEDIUM
|
|
|
|
|
|
),
|
|
|
|
|
|
title=finding.get("title", "Unknown"),
|
|
|
|
|
|
description=finding.get("description", ""),
|
|
|
|
|
|
file_path=finding.get("file_path"),
|
|
|
|
|
|
line_start=finding.get("line_start"),
|
|
|
|
|
|
line_end=finding.get("line_end"),
|
|
|
|
|
|
code_snippet=finding.get("code_snippet"),
|
|
|
|
|
|
source=finding.get("source"),
|
|
|
|
|
|
sink=finding.get("sink"),
|
|
|
|
|
|
suggestion=finding.get("suggestion") or finding.get("recommendation"),
|
|
|
|
|
|
is_verified=finding.get("is_verified", False),
|
|
|
|
|
|
confidence=finding.get("confidence", 0.5),
|
|
|
|
|
|
poc=finding.get("poc"),
|
2025-12-11 20:33:46 +08:00
|
|
|
|
status=FindingStatus.VERIFIED if finding.get("is_verified") else FindingStatus.NEW,
|
2025-12-11 19:09:10 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self.db.add(db_finding)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"Failed to save finding: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self.db.commit()
|
2025-12-13 12:35:03 +08:00
|
|
|
|
logger.info(f"[Runner] Successfully saved {len(findings)} findings to database")
|
2025-12-11 19:09:10 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to commit findings: {e}")
|
|
|
|
|
|
await self.db.rollback()
|
|
|
|
|
|
|
|
|
|
|
|
async def _update_task_status(
|
|
|
|
|
|
self,
|
|
|
|
|
|
status: AgentTaskStatus,
|
|
|
|
|
|
error: Optional[str] = None
|
|
|
|
|
|
):
|
|
|
|
|
|
"""更新任务状态"""
|
|
|
|
|
|
self.task.status = status
|
|
|
|
|
|
|
|
|
|
|
|
if status == AgentTaskStatus.RUNNING:
|
|
|
|
|
|
self.task.started_at = datetime.now(timezone.utc)
|
|
|
|
|
|
elif status in [AgentTaskStatus.COMPLETED, AgentTaskStatus.FAILED, AgentTaskStatus.CANCELLED]:
|
|
|
|
|
|
self.task.finished_at = datetime.now(timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
|
if error:
|
|
|
|
|
|
self.task.error_message = error
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self.db.commit()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to update task status: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
async def _update_task_phase(self, phase: AgentTaskPhase):
|
|
|
|
|
|
"""更新任务阶段"""
|
|
|
|
|
|
self.task.current_phase = phase
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self.db.commit()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to update task phase: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
async def _update_task_summary(
|
|
|
|
|
|
self,
|
|
|
|
|
|
total_findings: int,
|
|
|
|
|
|
verified_count: int,
|
|
|
|
|
|
security_score: int,
|
|
|
|
|
|
):
|
|
|
|
|
|
"""更新任务摘要"""
|
|
|
|
|
|
self.task.total_findings = total_findings
|
|
|
|
|
|
self.task.verified_findings = verified_count
|
|
|
|
|
|
self.task.security_score = security_score
|
2025-12-13 12:35:03 +08:00
|
|
|
|
|
2025-12-11 19:09:10 +08:00
|
|
|
|
try:
|
|
|
|
|
|
await self.db.commit()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to update task summary: {e}")
|
2025-12-13 12:35:03 +08:00
|
|
|
|
|
|
|
|
|
|
def _merge_findings_with_verification(
|
|
|
|
|
|
self,
|
|
|
|
|
|
findings: List[Dict],
|
|
|
|
|
|
verified_findings: List[Dict],
|
|
|
|
|
|
) -> List[Dict]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
合并原始 findings 和验证结果
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
findings: 原始 findings 列表
|
|
|
|
|
|
verified_findings: 验证后的 findings 列表
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
合并后的 findings 列表
|
|
|
|
|
|
"""
|
|
|
|
|
|
# 创建验证结果的查找映射
|
|
|
|
|
|
verified_map = {}
|
|
|
|
|
|
for vf in verified_findings:
|
|
|
|
|
|
if not isinstance(vf, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
key = (
|
|
|
|
|
|
vf.get("file_path", ""),
|
|
|
|
|
|
vf.get("line_start", 0),
|
|
|
|
|
|
vf.get("vulnerability_type", ""),
|
|
|
|
|
|
)
|
|
|
|
|
|
verified_map[key] = vf
|
|
|
|
|
|
|
|
|
|
|
|
merged = []
|
|
|
|
|
|
seen_keys = set()
|
|
|
|
|
|
|
|
|
|
|
|
# 首先处理原始 findings
|
|
|
|
|
|
for f in findings:
|
|
|
|
|
|
if not isinstance(f, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
key = (
|
|
|
|
|
|
f.get("file_path", ""),
|
|
|
|
|
|
f.get("line_start", 0),
|
|
|
|
|
|
f.get("vulnerability_type", ""),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if key in verified_map:
|
|
|
|
|
|
# 使用验证后的版本(包含 is_verified, poc 等)
|
|
|
|
|
|
merged.append(verified_map[key])
|
|
|
|
|
|
else:
|
|
|
|
|
|
# 保留原始 finding
|
|
|
|
|
|
merged.append(f)
|
|
|
|
|
|
|
|
|
|
|
|
seen_keys.add(key)
|
|
|
|
|
|
|
|
|
|
|
|
# 添加验证结果中的新发现(如果有)
|
|
|
|
|
|
for key, vf in verified_map.items():
|
|
|
|
|
|
if key not in seen_keys:
|
|
|
|
|
|
merged.append(vf)
|
|
|
|
|
|
|
|
|
|
|
|
return merged
|
2025-12-11 19:09:10 +08:00
|
|
|
|
|
|
|
|
|
|
async def _cleanup(self):
|
|
|
|
|
|
"""清理资源"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.sandbox_manager:
|
|
|
|
|
|
await self.sandbox_manager.cleanup()
|
|
|
|
|
|
await self.event_manager.close()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"Cleanup error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 便捷函数
|
|
|
|
|
|
async def run_agent_task(
|
|
|
|
|
|
db: AsyncSession,
|
|
|
|
|
|
task: AgentTask,
|
|
|
|
|
|
project_root: str,
|
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
运行 Agent 审计任务
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
db: 数据库会话
|
|
|
|
|
|
task: Agent 任务
|
|
|
|
|
|
project_root: 项目根目录
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
审计结果
|
|
|
|
|
|
"""
|
|
|
|
|
|
runner = AgentRunner(db, task, project_root)
|
|
|
|
|
|
return await runner.run()
|
|
|
|
|
|
|