feat(agent): 新增多语言代码测试和漏洞验证工具并增强错误处理

新增 PHP、Python、JavaScript 等多语言代码测试工具和命令注入、SQL 注入等专用漏洞验证工具优化错误处理逻辑，提供更详细的错误信息和堆栈跟踪增强 JSON 解析器，优先使用 json-repair 库处理复杂格式改进 Agent 响应解析逻辑，更健壮地提取思考内容和操作指令完善沙箱管理器的初始化和错误处理机制
2025-12-14 17:20:54 +08:00 · 2025-12-14 17:20:54 +08:00 · c64eddac7c
parent a85cdb687d
commit c64eddac7c
16 changed files with 4557 additions and 446 deletions
--- a/backend/app/api/v1/endpoints/agent_tasks.py
+++ b/backend/app/api/v1/endpoints/agent_tasks.py
@ -597,11 +597,50 @@ async def _initialize_tools(
    }
    
    # Verification 工具
+    # 🔥 导入沙箱工具
+    from app.services.agent.tools import (
+        SandboxTool, SandboxHttpTool, VulnerabilityVerifyTool, SandboxManager,
+        # 多语言代码测试工具
+        PhpTestTool, PythonTestTool, JavaScriptTestTool, JavaTestTool,
+        GoTestTool, RubyTestTool, ShellTestTool, UniversalCodeTestTool,
+        # 漏洞验证专用工具
+        CommandInjectionTestTool, SqlInjectionTestTool, XssTestTool,
+        PathTraversalTestTool, SstiTestTool, DeserializationTestTool,
+        UniversalVulnTestTool,
+    )
+
+    # 🔥 初始化沙箱管理器
+    sandbox_manager = SandboxManager()
+    await sandbox_manager.initialize()
+    logger.info(f"✅ Sandbox initialized (available: {sandbox_manager.is_available})")
+
    verification_tools = {
        **base_tools,
-        # 强制使用沙箱工具，移除 LLM 模拟验证工具
-        # "vulnerability_validation": VulnerabilityValidationTool(llm_service),
-        # "dataflow_analysis": DataFlowAnalysisTool(llm_service),
+        # 🔥 沙箱验证工具
+        "sandbox_exec": SandboxTool(sandbox_manager),
+        "sandbox_http": SandboxHttpTool(sandbox_manager),
+        "verify_vulnerability": VulnerabilityVerifyTool(sandbox_manager),
+
+        # 🔥 多语言代码测试工具
+        "php_test": PhpTestTool(sandbox_manager, project_root),
+        "python_test": PythonTestTool(sandbox_manager, project_root),
+        "javascript_test": JavaScriptTestTool(sandbox_manager, project_root),
+        "java_test": JavaTestTool(sandbox_manager, project_root),
+        "go_test": GoTestTool(sandbox_manager, project_root),
+        "ruby_test": RubyTestTool(sandbox_manager, project_root),
+        "shell_test": ShellTestTool(sandbox_manager, project_root),
+        "universal_code_test": UniversalCodeTestTool(sandbox_manager, project_root),
+
+        # 🔥 漏洞验证专用工具
+        "test_command_injection": CommandInjectionTestTool(sandbox_manager, project_root),
+        "test_sql_injection": SqlInjectionTestTool(sandbox_manager, project_root),
+        "test_xss": XssTestTool(sandbox_manager, project_root),
+        "test_path_traversal": PathTraversalTestTool(sandbox_manager, project_root),
+        "test_ssti": SstiTestTool(sandbox_manager, project_root),
+        "test_deserialization": DeserializationTestTool(sandbox_manager, project_root),
+        "universal_vuln_test": UniversalVulnTestTool(sandbox_manager, project_root),
+
+        # 报告工具
        "create_vulnerability_report": CreateVulnerabilityReportTool(),
    }
    
--- a/backend/app/services/agent/agents/analysis.py
+++ b/backend/app/services/agent/agents/analysis.py
@ -190,19 +190,15 @@ class AnalysisAgent(BaseAgent):

    
    def _parse_llm_response(self, response: str) -> AnalysisStep:
-        """解析 LLM 响应"""
+        """解析 LLM 响应 - 增强版，更健壮地提取思考内容"""
        step = AnalysisStep(thought="")
-        
-        # 提取 Thought
+
+        # 🔥 首先尝试提取明确的 Thought 标记
        thought_match = re.search(r'Thought:\s*(.*?)(?=Action:|Final Answer:|$)', response, re.DOTALL)
        if thought_match:
            step.thought = thought_match.group(1).strip()
-        elif not re.search(r'Action:|Final Answer:', response):
-             # 🔥 Fallback: If no markers found, treat the whole response as Thought
-             if response.strip():
-                 step.thought = response.strip()
-        
-        # 检查是否是最终答案
+
+        # 🔥 检查是否是最终答案
        final_match = re.search(r'Final Answer:\s*(.*?)$', response, re.DOTALL)
        if final_match:
            step.is_final = True
@ -211,23 +207,40 @@ class AnalysisAgent(BaseAgent):
            answer_text = re.sub(r'```\s*', '', answer_text)
            # 使用增强的 JSON 解析器
            step.final_answer = AgentJsonParser.parse(
-                answer_text, 
+                answer_text,
                default={"findings": [], "raw_answer": answer_text}
            )
            # 确保 findings 格式正确
            if "findings" in step.final_answer:
                step.final_answer["findings"] = [
-                    f for f in step.final_answer["findings"] 
+                    f for f in step.final_answer["findings"]
                    if isinstance(f, dict)
                ]
+
+            # 🔥 如果没有提取到 thought，使用 Final Answer 前的内容作为思考
+            if not step.thought:
+                before_final = response[:response.find('Final Answer:')].strip()
+                if before_final:
+                    before_final = re.sub(r'^Thought:\s*', '', before_final)
+                    step.thought = before_final[:500] if len(before_final) > 500 else before_final
+
            return step
-        
-        # 提取 Action
+
+        # 🔥 提取 Action
        action_match = re.search(r'Action:\s*(\w+)', response)
        if action_match:
            step.action = action_match.group(1).strip()
-        
-        # 提取 Action Input
+
+            # 🔥 如果没有提取到 thought，提取 Action 之前的内容作为思考
+            if not step.thought:
+                action_pos = response.find('Action:')
+                if action_pos > 0:
+                    before_action = response[:action_pos].strip()
+                    before_action = re.sub(r'^Thought:\s*', '', before_action)
+                    if before_action:
+                        step.thought = before_action[:500] if len(before_action) > 500 else before_action
+
+        # 🔥 提取 Action Input
        input_match = re.search(r'Action Input:\s*(.*?)(?=Thought:|Action:|Observation:|$)', response, re.DOTALL)
        if input_match:
            input_text = input_match.group(1).strip()
@ -238,7 +251,12 @@ class AnalysisAgent(BaseAgent):
                input_text,
                default={"raw_input": input_text}
            )
-        
+
+        # 🔥 最后的 fallback：如果整个响应没有任何标记，整体作为思考
+        if not step.thought and not step.action and not step.is_final:
+            if response.strip():
+                step.thought = response.strip()[:500]
+
        return step
    

@ -304,9 +322,12 @@ class AnalysisAgent(BaseAgent):
 """
        
        initial_message += f"""{handoff_context if handoff_context else f'''## 上下文信息
-### 高风险区域
+### ⚠️ 高风险区域（来自 Recon Agent，必须优先分析）
+以下是 Recon Agent 识别的高风险区域，请**务必优先**读取和分析这些文件：
 {json.dumps(high_risk_areas[:20], ensure_ascii=False)}

+**重要**: 请使用 read_file 工具读取上述高风险文件，不要假设文件路径或使用其他路径。
+
 ### 入口点 (前10个)
 {json.dumps(entry_points[:10], ensure_ascii=False, indent=2)}

@ -316,13 +337,20 @@ class AnalysisAgent(BaseAgent):
 ## 任务
 {task_context or task or '进行全面的安全漏洞分析，发现代码中的安全问题。'}

+## ⚠️ 分析策略要求
+1. **首先**：使用 read_file 读取上面列出的高风险文件
+2. **然后**：分析这些文件中的安全问题
+3. **最后**：如果需要，使用 smart_scan 或其他工具扩展分析
+
+**禁止**：不要跳过高风险区域直接做全局扫描
+
 ## 目标漏洞类型
 {config.get('target_vulnerabilities', ['all'])}

 ## 可用工具
 {self.get_tools_description()}

-请开始你的安全分析。首先思考分析策略，然后选择合适的工具开始分析。"""
+请开始你的安全分析。首先读取高风险区域的文件，然后分析其中的安全问题。"""
        
        # 🔥 记录工作开始
        self.record_work("开始安全漏洞分析")
--- a/backend/app/services/agent/agents/base.py
+++ b/backend/app/services/agent/agents/base.py
@ -16,6 +16,7 @@ from dataclasses import dataclass, field
 from enum import Enum
 from datetime import datetime, timezone
 import asyncio
+import json
 import logging
 import uuid

@ -1036,26 +1037,49 @@ class BaseAgent(ABC):
            
            if result.success:
                output = str(result.data)
-                
+
                # 包含 metadata 中的额外信息
                if result.metadata:
                    if "issues" in result.metadata:
-                        import json
                        output += f"\n\n发现的问题:\n{json.dumps(result.metadata['issues'], ensure_ascii=False, indent=2)}"
                    if "findings" in result.metadata:
-                        import json
                        output += f"\n\n发现:\n{json.dumps(result.metadata['findings'][:10], ensure_ascii=False, indent=2)}"
-                
+
                # 截断过长输出
                if len(output) > 6000:
                    output = output[:6000] + f"\n\n... [输出已截断，共 {len(str(result.data))} 字符]"
                return output
            else:
-                return f"工具执行失败: {result.error}"
-                
+                # 🔥 输出详细的错误信息，包括原始错误
+                error_msg = f"""⚠️ 工具执行失败
+
+**工具**: {tool_name}
+**参数**: {json.dumps(tool_input, ensure_ascii=False, indent=2) if tool_input else '无'}
+**错误**: {result.error}
+
+请根据错误信息调整参数或尝试其他方法。"""
+                return error_msg
+
        except Exception as e:
+            import traceback
            logger.error(f"Tool execution error: {e}")
-            return f"工具执行错误: {str(e)}"
+            # 🔥 输出完整的原始错误信息，包括堆栈跟踪
+            error_msg = f"""❌ 工具执行异常
+
+**工具**: {tool_name}
+**参数**: {json.dumps(tool_input, ensure_ascii=False, indent=2) if tool_input else '无'}
+**错误类型**: {type(e).__name__}
+**错误信息**: {str(e)}
+**堆栈跟踪**:
+```
+{traceback.format_exc()}
+```
+
+请分析错误原因，可能需要：
+1. 检查参数格式是否正确
+2. 尝试使用其他工具
+3. 如果是权限或资源问题，跳过该操作"""
+            return error_msg
    
    def get_tools_description(self) -> str:
        """生成工具描述文本（用于 prompt）"""
--- a/backend/app/services/agent/agents/orchestrator.py
+++ b/backend/app/services/agent/agents/orchestrator.py
@ -159,6 +159,9 @@ class OrchestratorAgent(BaseAgent):
        
        # 🔥 跟踪已调度的 Agent 任务，避免重复调度
        self._dispatched_tasks: Dict[str, int] = {}  # agent_name -> dispatch_count
+
+        # 🔥 保存各个 Agent 的完整结果，用于传递给后续 Agent
+        self._agent_results: Dict[str, Dict[str, Any]] = {}  # agent_name -> full result data
    
    def register_sub_agent(self, name: str, agent: BaseAgent):
        """注册子 Agent"""
@ -216,6 +219,7 @@ class OrchestratorAgent(BaseAgent):
        
        self._steps = []
        self._all_findings = []
+        self._agent_results = {}  # 🔥 重置 Agent 结果缓存
        final_result = None
        error_message = None  # 🔥 跟踪错误信息
        
@ -625,16 +629,23 @@ Action Input: {{"参数": "值"}}
            # 确保 project_info 包含 root 路径
            if "root" not in project_info:
                project_info["root"] = self._runtime_context.get("project_root", ".")
-            
+
+            # 🔥 FIX: 构建完整的 previous_results，包含所有已执行 Agent 的结果
+            previous_results = {
+                "findings": self._all_findings,  # 传递已收集的发现
+            }
+
+            # 🔥 将之前 Agent 的完整结果传递给后续 Agent
+            for prev_agent, prev_data in self._agent_results.items():
+                previous_results[prev_agent] = {"data": prev_data}
+
            sub_input = {
                "task": task,
                "task_context": context,
                "project_info": project_info,
                "config": self._runtime_context.get("config", {}),
                "project_root": self._runtime_context.get("project_root", "."),
-                "previous_results": {
-                    "findings": self._all_findings,  # 传递已收集的发现
-                },
+                "previous_results": previous_results,
            }
            
            # 🔥 执行子 Agent 前检查取消状态
@ -647,11 +658,18 @@ Action Input: {{"参数": "值"}}
            # 🔥 执行后再次检查取消状态
            if self.is_cancelled:
                return f"## {agent_name} Agent 执行中断\n\n任务已被用户取消"
-            
+
            # 🔥 处理子 Agent 结果 - 不同 Agent 返回不同的数据结构
+            # 🔥 DEBUG: 添加诊断日志
+            logger.info(f"[Orchestrator] Processing {agent_name} result: success={result.success}, data_type={type(result.data).__name__}, data_keys={list(result.data.keys()) if isinstance(result.data, dict) else 'N/A'}")
+
            if result.success and result.data:
                data = result.data

+                # 🔥 FIX: 保存 Agent 的完整结果，供后续 Agent 使用
+                self._agent_results[agent_name] = data
+                logger.info(f"[Orchestrator] Saved {agent_name} result with keys: {list(data.keys())}")
+
                # 🔥 CRITICAL FIX: 收集发现 - 支持多种字段名
                # findings 字段通常来自 Analysis/Verification Agent
                # initial_findings 来自 Recon Agent
@ -662,21 +680,112 @@ Action Input: {{"参数": "值"}}
                # 即使 findings 为空列表，也检查 initial_findings
                if "initial_findings" in data:
                    initial = data.get("initial_findings", [])
-                    logger.info(f"[Orchestrator] {agent_name} has {len(initial)} initial_findings")
+                    logger.info(f"[Orchestrator] {agent_name} has {len(initial)} initial_findings, types: {[type(f).__name__ for f in initial[:3]]}")
                    for f in initial:
                        if isinstance(f, dict):
                            # 🔥 Normalize finding format - 处理 Recon 返回的格式
                            normalized = self._normalize_finding(f)
                            if normalized not in raw_findings:
                                raw_findings.append(normalized)
-                        elif isinstance(f, str):
-                            # String finding from Recon - skip, it's just an observation
-                            logger.debug(f"[Orchestrator] Skipping string finding: {f[:50]}...")
+                                logger.info(f"[Orchestrator] Added dict finding from initial_findings")
+                        elif isinstance(f, str) and f.strip():
+                            # 🔥 FIX: Convert string finding to dict format instead of skipping
+                            # Recon Agent 有时候会返回字符串格式的发现
+                            # 尝试从字符串中提取文件路径（格式如 "app.py:36 - 描述"）
+                            file_path = ""
+                            line_start = 0
+                            if ":" in f:
+                                parts = f.split(":", 1)
+                                potential_file = parts[0].strip()
+                                # 检查是否像文件路径
+                                if "." in potential_file and "/" not in potential_file[:3]:
+                                    file_path = potential_file
+                                    # 尝试提取行号
+                                    if len(parts) > 1:
+                                        remaining = parts[1].strip()
+                                        line_match = remaining.split()[0] if remaining else ""
+                                        if line_match.isdigit():
+                                            line_start = int(line_match)
+
+                            string_finding = {
+                                "title": f[:100] if len(f) > 100 else f,
+                                "description": f,
+                                "file_path": file_path,
+                                "line_start": line_start,
+                                "severity": "medium",  # 默认中等严重度，Analysis 会重新评估
+                                "vulnerability_type": "potential_issue",
+                                "source": "recon",
+                                "needs_verification": True,
+                                "confidence": 0.5,  # 较低置信度，需要进一步分析
+                            }
+                            logger.info(f"[Orchestrator] Converted string finding to dict: {f[:80]}... (file={file_path}, line={line_start})")
+                            raw_findings.append(string_finding)
+                else:
+                    logger.info(f"[Orchestrator] {agent_name} has no 'initial_findings' key in data")

                # 🔥 Also check high_risk_areas from Recon for potential findings
                if agent_name == "recon" and "high_risk_areas" in data:
                    high_risk = data.get("high_risk_areas", [])
                    logger.info(f"[Orchestrator] {agent_name} identified {len(high_risk)} high risk areas")
+                    # 🔥 FIX: 将 high_risk_areas 也转换为发现
+                    for area in high_risk:
+                        if isinstance(area, str) and area.strip():
+                            # 尝试从描述中提取文件路径和漏洞类型
+                            file_path = ""
+                            line_start = 0
+                            vuln_type = "potential_issue"
+
+                            # 🔥 FIX: 改进文件路径提取逻辑
+                            # 格式1: "file.py:36 - 描述" -> 提取 file.py 和 36
+                            # 格式2: "描述性文本" -> 不提取文件路径
+                            if ":" in area:
+                                parts = area.split(":", 1)
+                                potential_file = parts[0].strip()
+                                # 只有当 parts[0] 看起来像文件路径时才提取
+                                # 文件路径通常包含 . 且没有空格（或只在结尾有扩展名）
+                                if ("." in potential_file and
+                                    " " not in potential_file and
+                                    len(potential_file) < 100 and
+                                    any(potential_file.endswith(ext) for ext in ['.py', '.js', '.ts', '.java', '.go', '.php', '.rb', '.c', '.cpp', '.h'])):
+                                    file_path = potential_file
+                                    # 尝试提取行号
+                                    if len(parts) > 1:
+                                        remaining = parts[1].strip()
+                                        line_match = remaining.split()[0] if remaining else ""
+                                        if line_match.isdigit():
+                                            line_start = int(line_match)
+
+                            # 推断漏洞类型
+                            area_lower = area.lower()
+                            if "command" in area_lower or "命令" in area_lower or "subprocess" in area_lower:
+                                vuln_type = "command_injection"
+                            elif "sql" in area_lower:
+                                vuln_type = "sql_injection"
+                            elif "xss" in area_lower:
+                                vuln_type = "xss"
+                            elif "path" in area_lower or "traversal" in area_lower or "路径" in area_lower:
+                                vuln_type = "path_traversal"
+                            elif "ssrf" in area_lower:
+                                vuln_type = "ssrf"
+                            elif "secret" in area_lower or "密钥" in area_lower or "key" in area_lower:
+                                vuln_type = "hardcoded_secret"
+
+                            high_risk_finding = {
+                                "title": area[:100] if len(area) > 100 else area,
+                                "description": area,
+                                "file_path": file_path,
+                                "line_start": line_start,
+                                "severity": "high",  # 高风险区域默认高严重度
+                                "vulnerability_type": vuln_type,
+                                "source": "recon_high_risk",
+                                "needs_verification": True,
+                                "confidence": 0.6,
+                            }
+                            raw_findings.append(high_risk_finding)
+                            logger.info(f"[Orchestrator] Converted high_risk_area to finding: {area[:60]}... (file={file_path}, type={vuln_type})")
+
+                # 🔥 初始化 valid_findings，确保后续代码可以访问
+                valid_findings = []

                if raw_findings:
                    # 只添加字典格式的发现
--- a/backend/app/services/agent/agents/recon.py
+++ b/backend/app/services/agent/agents/recon.py
@ -70,20 +70,15 @@ class ReconAgent(BaseAgent):
        self._steps: List[ReconStep] = []
    
    def _parse_llm_response(self, response: str) -> ReconStep:
-        """解析 LLM 响应"""
+        """解析 LLM 响应 - 增强版，更健壮地提取思考内容"""
        step = ReconStep(thought="")
-        
-        # 提取 Thought
+
+        # 🔥 首先尝试提取明确的 Thought 标记
        thought_match = re.search(r'Thought:\s*(.*?)(?=Action:|Final Answer:|$)', response, re.DOTALL)
        if thought_match:
            step.thought = thought_match.group(1).strip()
-        elif not re.search(r'Action:|Final Answer:', response):
-             # 🔥 Fallback: If no markers found, treat the whole response as Thought
-             # This prevents empty steps loops "Decision: Continue Thinking"
-             if response.strip():
-                 step.thought = response.strip()
-        
-        # 检查是否是最终答案
+
+        # 🔥 检查是否是最终答案
        final_match = re.search(r'Final Answer:\s*(.*?)$', response, re.DOTALL)
        if final_match:
            step.is_final = True
@ -92,29 +87,45 @@ class ReconAgent(BaseAgent):
            answer_text = re.sub(r'```\s*', '', answer_text)
            # 使用增强的 JSON 解析器
            step.final_answer = AgentJsonParser.parse(
-                answer_text, 
+                answer_text,
                default={"raw_answer": answer_text}
            )
            # 确保 findings 格式正确
            if "initial_findings" in step.final_answer:
                step.final_answer["initial_findings"] = [
-                    f for f in step.final_answer["initial_findings"] 
+                    f for f in step.final_answer["initial_findings"]
                    if isinstance(f, dict)
                ]
+
+            # 🔥 如果没有提取到 thought，使用 Final Answer 前的内容作为思考
+            if not step.thought:
+                before_final = response[:response.find('Final Answer:')].strip()
+                if before_final:
+                    # 移除可能的 Thought: 前缀
+                    before_final = re.sub(r'^Thought:\s*', '', before_final)
+                    step.thought = before_final[:500] if len(before_final) > 500 else before_final
+
            return step
-        
-        # 提取 Action
+
+        # 🔥 提取 Action
        action_match = re.search(r'Action:\s*(\w+)', response)
        if action_match:
            step.action = action_match.group(1).strip()
-        
-        # 提取 Action Input
+
+            # 🔥 如果没有提取到 thought，提取 Action 之前的内容作为思考
+            if not step.thought:
+                action_pos = response.find('Action:')
+                if action_pos > 0:
+                    before_action = response[:action_pos].strip()
+                    # 移除可能的 Thought: 前缀
+                    before_action = re.sub(r'^Thought:\s*', '', before_action)
+                    if before_action:
+                        step.thought = before_action[:500] if len(before_action) > 500 else before_action
+
+        # 🔥 提取 Action Input
        input_match = re.search(r'Action Input:\s*(.*?)(?=Thought:|Action:|Observation:|$)', response, re.DOTALL)
        if input_match:
            input_text = input_match.group(1).strip()
-    
-
-
            input_text = re.sub(r'```json\s*', '', input_text)
            input_text = re.sub(r'```\s*', '', input_text)
            # 使用增强的 JSON 解析器
@ -122,7 +133,12 @@ class ReconAgent(BaseAgent):
                input_text,
                default={"raw_input": input_text}
            )
-        
+
+        # 🔥 最后的 fallback：如果整个响应没有任何标记，整体作为思考
+        if not step.thought and not step.action and not step.is_final:
+            if response.strip():
+                step.thought = response.strip()[:500]
+
        return step
    

--- a/backend/app/services/agent/agents/verification.py
+++ b/backend/app/services/agent/agents/verification.py
@ -44,13 +44,70 @@ VERIFICATION_SYSTEM_PROMPT = """你是 DeepAudit 的漏洞验证 Agent，一个*
 - **list_files**: 列出目录文件
  参数: directory (str), pattern (str)

-### 沙箱验证 (必须使用)
+### 沙箱核心工具
 - **sandbox_exec**: 在沙箱中执行命令
  参数: command (str), timeout (int)
 - **sandbox_http**: 发送 HTTP 请求测试
  参数: method (str), url (str), data (dict), headers (dict)
 - **verify_vulnerability**: 自动化漏洞验证
-  参数: vulnerability_type (str), target (str), payload (str)
+  参数: vulnerability_type (str), target_url (str), payload (str), expected_pattern (str)
+
+### 🔥 多语言代码测试工具 (按语言选择)
+- **php_test**: 测试 PHP 代码，支持模拟 GET/POST 参数
+  参数: file_path (str), php_code (str), get_params (dict), post_params (dict), timeout (int)
+  示例: {"file_path": "vuln.php", "get_params": {"cmd": "whoami"}}
+
+- **python_test**: 测试 Python 代码，支持模拟 Flask/Django 请求
+  参数: file_path (str), code (str), request_params (dict), form_data (dict), timeout (int)
+  示例: {"code": "import os; os.system(params['cmd'])", "request_params": {"cmd": "id"}}
+
+- **javascript_test**: 测试 JavaScript/Node.js 代码
+  参数: file_path (str), code (str), req_query (dict), req_body (dict), timeout (int)
+  示例: {"code": "exec(req.query.cmd)", "req_query": {"cmd": "id"}}
+
+- **java_test**: 测试 Java 代码，支持模拟 Servlet 请求
+  参数: file_path (str), code (str), request_params (dict), timeout (int)
+
+- **go_test**: 测试 Go 代码
+  参数: file_path (str), code (str), args (list), timeout (int)
+
+- **ruby_test**: 测试 Ruby 代码，支持模拟 Rails 请求
+  参数: file_path (str), code (str), params (dict), timeout (int)
+
+- **shell_test**: 测试 Shell/Bash 脚本
+  参数: file_path (str), code (str), args (list), env (dict), timeout (int)
+
+- **universal_code_test**: 通用多语言测试工具 (自动检测语言)
+  参数: language (str), file_path (str), code (str), params (dict), timeout (int)
+
+### 🔥 漏洞验证专用工具 (按漏洞类型选择，推荐使用)
+- **test_command_injection**: 专门测试命令注入漏洞
+  参数: target_file (str), param_name (str), test_command (str), language (str)
+  示例: {"target_file": "vuln.php", "param_name": "cmd", "test_command": "whoami"}
+
+- **test_sql_injection**: 专门测试 SQL 注入漏洞
+  参数: target_file (str), param_name (str), db_type (str), injection_type (str)
+  示例: {"target_file": "login.php", "param_name": "username", "db_type": "mysql"}
+
+- **test_xss**: 专门测试 XSS 漏洞
+  参数: target_file (str), param_name (str), xss_type (str), context (str)
+  示例: {"target_file": "search.php", "param_name": "q", "xss_type": "reflected"}
+
+- **test_path_traversal**: 专门测试路径遍历漏洞
+  参数: target_file (str), param_name (str), target_path (str)
+  示例: {"target_file": "download.php", "param_name": "file", "target_path": "/etc/passwd"}
+
+- **test_ssti**: 专门测试模板注入漏洞
+  参数: target_file (str), param_name (str), template_engine (str)
+  示例: {"target_file": "render.py", "param_name": "name", "template_engine": "jinja2"}
+
+- **test_deserialization**: 专门测试反序列化漏洞
+  参数: target_file (str), language (str), serialization_format (str)
+  示例: {"target_file": "api.php", "language": "php", "serialization_format": "php_serialize"}
+
+- **universal_vuln_test**: 通用漏洞测试工具 (自动选择测试策略)
+  参数: vuln_type (str), target_file (str), param_name (str), additional_params (dict)
+  支持: command_injection, sql_injection, xss, path_traversal, ssti, deserialization

 ## 工作方式
 你将收到一批待验证的漏洞发现。对于每个发现，你需要：
@ -82,7 +139,7 @@ Final Answer: [JSON 格式的验证报告]
            "poc": {
                "description": "PoC 描述",
                "steps": ["步骤1", "步骤2"],
-                "payload": "测试 payload"
+                "payload": "curl 'http://target/vuln.php?cmd=id' 或完整利用代码"
            },
            "impact": "实际影响分析",
            "recommendation": "修复建议"
@ -104,20 +161,56 @@ Final Answer: [JSON 格式的验证报告]
 - **false_positive**: 确认是误报，有明确理由

 ## 验证策略建议
+
+### 对于命令注入漏洞
+1. 使用 **test_command_injection** 工具，它会自动构建测试环境
+2. 或使用对应语言的测试工具 (php_test, python_test 等)
+3. 检查命令输出是否包含 uid=, root, www-data 等特征
+
+### 对于 SQL 注入漏洞
+1. 使用 **test_sql_injection** 工具
+2. 提供数据库类型 (mysql, postgresql, sqlite)
+3. 检查是否能执行 UNION 查询或提取数据
+
+### 对于 XSS 漏洞
+1. 使用 **test_xss** 工具
+2. 指定 XSS 类型 (reflected, stored, dom)
+3. 检查 payload 是否在输出中未转义
+
+### 对于路径遍历漏洞
+1. 使用 **test_path_traversal** 工具
+2. 尝试读取 /etc/passwd 或其他已知文件
+3. 检查是否能访问目标文件
+
+### 对于模板注入 (SSTI) 漏洞
+1. 使用 **test_ssti** 工具
+2. 指定模板引擎 (jinja2, twig, freemarker 等)
+3. 检查数学表达式是否被执行
+
+### 对于反序列化漏洞
+1. 使用 **test_deserialization** 工具
+2. 指定语言和序列化格式
+3. 检查是否能执行任意代码
+
+### 对于其他漏洞
 1. **上下文分析**: 用 read_file 获取更多代码上下文
-2. **数据流追踪**: 用 dataflow_analysis 确认污点传播
-3. **LLM 深度分析**: 用 vulnerability_validation 进行专业分析
-4. **沙箱测试**: 对高危漏洞用沙箱进行安全测试
+2. **通用测试**: 使用 universal_vuln_test 或 universal_code_test
+3. **沙箱测试**: 对高危漏洞用沙箱进行安全测试

 ## 重要原则
 1. **质量优先** - 宁可漏报也不要误报太多
 2. **深入理解** - 理解代码逻辑，不要表面判断
 3. **证据支撑** - 判定要有依据
 4. **安全第一** - 沙箱测试要谨慎
-5. **🔥 PoC 生成** - 对于 confirmed 和 likely 的漏洞，**必须**生成 PoC:
+5. **🔥 PoC 生成** - 对于 confirmed 和 likely 的漏洞，**必须**生成完整的 PoC:
   - poc.description: 简要描述这个 PoC 的作用
   - poc.steps: 详细的复现步骤列表
-   - poc.payload: 实际的攻击载荷或测试代码
+   - poc.payload: **完整的**利用代码或命令，例如:
+     - Web漏洞: 完整URL如 `http://target/path?param=<payload>`
+     - 命令注入: 完整的 curl 命令或 HTTP 请求
+     - SQL注入: 完整的利用语句或请求
+     - 代码执行: 可直接运行的利用脚本
+   - ⚠️ payload 字段必须是**可直接复制执行**的完整利用代码，不要只写参数值

 现在开始验证漏洞发现！"""

@ -168,19 +261,15 @@ class VerificationAgent(BaseAgent):

    
    def _parse_llm_response(self, response: str) -> VerificationStep:
-        """解析 LLM 响应"""
+        """解析 LLM 响应 - 增强版，更健壮地提取思考内容"""
        step = VerificationStep(thought="")
-        
-        # 提取 Thought
+
+        # 🔥 首先尝试提取明确的 Thought 标记
        thought_match = re.search(r'Thought:\s*(.*?)(?=Action:|Final Answer:|$)', response, re.DOTALL)
        if thought_match:
            step.thought = thought_match.group(1).strip()
-        elif not re.search(r'Action:|Final Answer:', response):
-             # 🔥 Fallback: If no markers found, treat the whole response as Thought
-             if response.strip():
-                 step.thought = response.strip()
-        
-        # 检查是否是最终答案
+
+        # 🔥 检查是否是最终答案
        final_match = re.search(r'Final Answer:\s*(.*?)$', response, re.DOTALL)
        if final_match:
            step.is_final = True
@ -189,23 +278,40 @@ class VerificationAgent(BaseAgent):
            answer_text = re.sub(r'```\s*', '', answer_text)
            # 使用增强的 JSON 解析器
            step.final_answer = AgentJsonParser.parse(
-                answer_text, 
+                answer_text,
                default={"findings": [], "raw_answer": answer_text}
            )
            # 确保 findings 格式正确
            if "findings" in step.final_answer:
                step.final_answer["findings"] = [
-                    f for f in step.final_answer["findings"] 
+                    f for f in step.final_answer["findings"]
                    if isinstance(f, dict)
                ]
+
+            # 🔥 如果没有提取到 thought，使用 Final Answer 前的内容作为思考
+            if not step.thought:
+                before_final = response[:response.find('Final Answer:')].strip()
+                if before_final:
+                    before_final = re.sub(r'^Thought:\s*', '', before_final)
+                    step.thought = before_final[:500] if len(before_final) > 500 else before_final
+
            return step
-        
-        # 提取 Action
+
+        # 🔥 提取 Action
        action_match = re.search(r'Action:\s*(\w+)', response)
        if action_match:
            step.action = action_match.group(1).strip()
-        
-        # 提取 Action Input
+
+            # 🔥 如果没有提取到 thought，提取 Action 之前的内容作为思考
+            if not step.thought:
+                action_pos = response.find('Action:')
+                if action_pos > 0:
+                    before_action = response[:action_pos].strip()
+                    before_action = re.sub(r'^Thought:\s*', '', before_action)
+                    if before_action:
+                        step.thought = before_action[:500] if len(before_action) > 500 else before_action
+
+        # 🔥 提取 Action Input
        input_match = re.search(r'Action Input:\s*(.*?)(?=Thought:|Action:|Observation:|$)', response, re.DOTALL)
        if input_match:
            input_text = input_match.group(1).strip()
@ -216,7 +322,12 @@ class VerificationAgent(BaseAgent):
                input_text,
                default={"raw_input": input_text}
            )
-        
+
+        # 🔥 最后的 fallback：如果整个响应没有任何标记，整体作为思考
+        if not step.thought and not step.action and not step.is_final:
+            if response.strip():
+                step.thought = response.strip()[:500]
+
        return step
    
    async def run(self, input_data: Dict[str, Any]) -> AgentResult:
@ -297,7 +408,24 @@ class VerificationAgent(BaseAgent):
        
        # 去重
        findings_to_verify = self._deduplicate(findings_to_verify)
-        
+
+        # 🔥 FIX: 优先处理有明确文件路径的发现，将没有文件路径的发现放到后面
+        # 这确保 Analysis 的具体发现优先于 Recon 的泛化描述
+        def has_valid_file_path(finding: Dict) -> bool:
+            file_path = finding.get("file_path", "")
+            return bool(file_path and file_path.strip() and file_path.lower() not in ["unknown", "n/a", ""])
+
+        findings_with_path = [f for f in findings_to_verify if has_valid_file_path(f)]
+        findings_without_path = [f for f in findings_to_verify if not has_valid_file_path(f)]
+
+        # 合并：有路径的在前，没路径的在后
+        findings_to_verify = findings_with_path + findings_without_path
+
+        if findings_with_path:
+            logger.info(f"[Verification] 优先处理 {len(findings_with_path)} 个有明确文件路径的发现")
+        if findings_without_path:
+            logger.info(f"[Verification] 还有 {len(findings_without_path)} 个发现需要自行定位文件")
+
        if not findings_to_verify:
            logger.warning(f"[Verification] 没有需要验证的发现! previous_results keys: {list(previous_results.keys()) if isinstance(previous_results, dict) else 'not dict'}")
            await self.emit_event("warning", "没有需要验证的发现 - 可能是数据格式问题")
@ -322,11 +450,25 @@ class VerificationAgent(BaseAgent):
        
        findings_summary = []
        for i, f in enumerate(findings_to_verify):
+            # 🔥 FIX: 正确处理 file_path 格式，可能包含行号 (如 "app.py:36")
+            file_path = f.get('file_path', 'unknown')
+            line_start = f.get('line_start', 0)
+
+            # 如果 file_path 已包含行号，提取出来
+            if isinstance(file_path, str) and ':' in file_path:
+                parts = file_path.split(':', 1)
+                if len(parts) == 2 and parts[1].split()[0].isdigit():
+                    file_path = parts[0]
+                    try:
+                        line_start = int(parts[1].split()[0])
+                    except ValueError:
+                        pass
+
            findings_summary.append(f"""
 ### 发现 {i+1}: {f.get('title', 'Unknown')}
 - 类型: {f.get('vulnerability_type', 'unknown')}
 - 严重度: {f.get('severity', 'medium')}
- 文件: {f.get('file_path', 'unknown')}:{f.get('line_start', 0)}
+- 文件: {file_path} (行 {line_start})
 - 代码:
 ```
 {f.get('code_snippet', 'N/A')[:500]}
@ -341,13 +483,22 @@ class VerificationAgent(BaseAgent):
 ## 待验证发现
 {''.join(findings_summary)}

+## ⚠️ 重要验证指南
+1. **直接使用上面列出的文件路径** - 不要猜测或搜索其他路径
+2. **如果文件路径包含冒号和行号** (如 "app.py:36"), 请提取文件名 "app.py" 并使用 read_file 读取
+3. **先读取文件内容，再判断漏洞是否存在**
+4. **不要假设文件在子目录中** - 使用发现中提供的精确路径
+
 ## 验证要求
 - 验证级别: {config.get('verification_level', 'standard')}

 ## 可用工具
 {self.get_tools_description()}

-请开始验证。对于每个发现，思考如何验证它，使用合适的工具获取更多信息，然后判断是否为真实漏洞。
+请开始验证。对于每个发现：
+1. 首先使用 read_file 读取发现中指定的文件（使用精确路径）
+2. 分析代码上下文
+3. 判断是否为真实漏洞
 {f"特别注意 Analysis Agent 提到的关注点。" if handoff_context else ""}"""

        # 初始化对话历史
--- a/backend/app/services/agent/graph/runner.py
+++ b/backend/app/services/agent/graph/runner.py
@ -197,6 +197,13 @@ class AgentRunner:
        from app.services.agent.tools import (
            ThinkTool, ReflectTool,
            CreateVulnerabilityReportTool,
+            # 多语言代码测试工具
+            PhpTestTool, PythonTestTool, JavaScriptTestTool, JavaTestTool,
+            GoTestTool, RubyTestTool, ShellTestTool, UniversalCodeTestTool,
+            # 漏洞验证专用工具
+            CommandInjectionTestTool, SqlInjectionTestTool, XssTestTool,
+            PathTraversalTestTool, SstiTestTool, DeserializationTestTool,
+            UniversalVulnTestTool,
        )
        # 🔥 导入知识查询工具
        from app.services.agent.knowledge import (
@ -282,6 +289,8 @@ class AgentRunner:
                network_mode=settings.SANDBOX_NETWORK_MODE,
            )
            self.sandbox_manager = SandboxManager(config=sandbox_config)
+            # 🔥 必须调用 initialize() 来连接 Docker
+            await self.sandbox_manager.initialize()
        except Exception as e:
            logger.warning(f"❌ Sandbox Manager initialization failed: {e}")
            import traceback
@ -289,15 +298,38 @@ class AgentRunner:
            # 尝试创建默认管理器作为后备
            try:
                self.sandbox_manager = SandboxManager()
+                # 🔥 同样需要调用 initialize()
+                await self.sandbox_manager.initialize()
                logger.info("⚠️ Created fallback SandboxManager (Docker might be unavailable)")
            except Exception as e2:
                logger.error(f"❌ Failed to create fallback SandboxManager: {e2}")

        # 始终注册沙箱工具，即使 Docker 不可用（工具内部会检查）
        if self.sandbox_manager:
+            # 🔥 沙箱核心工具
            self.verification_tools["sandbox_exec"] = SandboxTool(self.sandbox_manager)
            self.verification_tools["sandbox_http"] = SandboxHttpTool(self.sandbox_manager)
            self.verification_tools["verify_vulnerability"] = VulnerabilityVerifyTool(self.sandbox_manager)
+
+            # 🔥 多语言代码测试工具
+            self.verification_tools["php_test"] = PhpTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["python_test"] = PythonTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["javascript_test"] = JavaScriptTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["java_test"] = JavaTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["go_test"] = GoTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["ruby_test"] = RubyTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["shell_test"] = ShellTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["universal_code_test"] = UniversalCodeTestTool(self.sandbox_manager, self.project_root)
+
+            # 🔥 漏洞验证专用工具
+            self.verification_tools["test_command_injection"] = CommandInjectionTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["test_sql_injection"] = SqlInjectionTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["test_xss"] = XssTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["test_path_traversal"] = PathTraversalTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["test_ssti"] = SstiTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["test_deserialization"] = DeserializationTestTool(self.sandbox_manager, self.project_root)
+            self.verification_tools["universal_vuln_test"] = UniversalVulnTestTool(self.sandbox_manager, self.project_root)
+
            logger.info(f"✅ Sandbox tools initialized (Docker available: {self.sandbox_manager.is_available})")
        else:
             logger.error("❌ Sandbox tools NOT initialized due to critical manager failure")
--- a/backend/app/services/agent/json_parser.py
+++ b/backend/app/services/agent/json_parser.py
@ -1,6 +1,6 @@
 """
 Agent JSON 解析工具
-从 LLM 响应中安全地解析 JSON，参考 llm/service.py 的实现
+从 LLM 响应中安全地解析 JSON，优先使用 json-repair 库
 """

 import json
@ -14,14 +14,15 @@ logger = logging.getLogger(__name__)
 try:
    from json_repair import repair_json
    JSON_REPAIR_AVAILABLE = True
+    logger.info("✅ json-repair 库已加载")
 except ImportError:
    JSON_REPAIR_AVAILABLE = False
-    logger.debug("json-repair library not available")
+    logger.warning("⚠️ json-repair 库未安装，将使用备用解析方法")


 class AgentJsonParser:
-    """Agent 专用的 JSON 解析器"""
-    
+    """Agent 专用的 JSON 解析器 - 优先使用 json-repair"""
+
    @staticmethod
    def clean_text(text: str) -> str:
        """清理文本中的控制字符"""
@ -30,7 +31,7 @@ class AgentJsonParser:
        # 移除 BOM 和零宽字符
        text = text.replace('\ufeff', '').replace('\u200b', '').replace('\u200c', '').replace('\u200d', '')
        return text
-    
+
    @staticmethod
    def fix_json_format(text: str) -> str:
        """修复常见的 JSON 格式问题"""
@ -40,7 +41,66 @@ class AgentJsonParser:
        # 修复未转义的换行符（在字符串值中）
        text = re.sub(r':\s*"([^"]*)\n([^"]*)"', r': "\1\\n\2"', text)
        return text
-    
+
+    @classmethod
+    def extract_json_string(cls, text: str) -> str:
+        """从文本中提取 JSON 字符串部分"""
+        # 先尝试从 markdown 代码块提取
+        md_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
+        if md_match:
+            return md_match.group(1).strip()
+
+        # 找到第一个 { 或 [
+        start_brace = text.find('{')
+        start_bracket = text.find('[')
+
+        if start_brace == -1 and start_bracket == -1:
+            return text  # 没有找到，返回原文
+
+        if start_brace == -1:
+            start_idx = start_bracket
+        elif start_bracket == -1:
+            start_idx = start_brace
+        else:
+            start_idx = min(start_brace, start_bracket)
+
+        # 找到最后一个 } 或 ]
+        end_brace = text.rfind('}')
+        end_bracket = text.rfind(']')
+
+        if end_brace == -1 and end_bracket == -1:
+            return text[start_idx:]  # 没有找到结尾，返回从开始到末尾
+
+        end_idx = max(end_brace, end_bracket) + 1
+
+        return text[start_idx:end_idx]
+
+    @classmethod
+    def repair_with_library(cls, text: str) -> Dict[str, Any]:
+        """使用 json-repair 库修复并解析 JSON"""
+        if not JSON_REPAIR_AVAILABLE:
+            raise ValueError("json-repair library not available")
+
+        # 提取 JSON 字符串
+        json_str = cls.extract_json_string(text)
+
+        if not json_str.strip():
+            raise ValueError("No JSON content found")
+
+        # 使用 json-repair 修复并解析
+        repaired = repair_json(json_str, return_objects=True)
+
+        if isinstance(repaired, dict):
+            return repaired
+        elif isinstance(repaired, list):
+            # 如果返回列表，包装为字典
+            return {"items": repaired}
+        elif isinstance(repaired, str):
+            # 如果返回字符串，尝试再次解析
+            return json.loads(repaired)
+
+        raise ValueError(f"json-repair returned unexpected type: {type(repaired)}")
+
    @classmethod
    def extract_from_markdown(cls, text: str) -> Dict[str, Any]:
        """从 markdown 代码块提取 JSON"""
@ -48,35 +108,35 @@ class AgentJsonParser:
        if match:
            return json.loads(match.group(1))
        raise ValueError("No markdown code block found")
-    
+
    @classmethod
    def extract_json_object(cls, text: str) -> Dict[str, Any]:
        """智能提取 JSON 对象"""
        start_idx = text.find('{')
        if start_idx == -1:
            raise ValueError("No JSON object found")
-        
+
        # 考虑字符串内的花括号和转义字符
        brace_count = 0
        in_string = False
        escape_next = False
        end_idx = -1
-        
+
        for i in range(start_idx, len(text)):
            char = text[i]
-            
+
            if escape_next:
                escape_next = False
                continue
-            
+
            if char == '\\':
                escape_next = True
                continue
-            
+
            if char == '"' and not escape_next:
                in_string = not in_string
                continue
-            
+
            if not in_string:
                if char == '{':
                    brace_count += 1
@ -85,7 +145,7 @@ class AgentJsonParser:
                    if brace_count == 0:
                        end_idx = i + 1
                        break
-        
+
        if end_idx == -1:
            # 如果找不到完整的 JSON，尝试使用最后一个 }
            last_brace = text.rfind('}')
@ -93,68 +153,45 @@ class AgentJsonParser:
                end_idx = last_brace + 1
            else:
                raise ValueError("Incomplete JSON object")
-        
+
        json_str = text[start_idx:end_idx]
        # 修复格式问题
        json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
-        
+
        return json.loads(json_str)
-    
+
    @classmethod
    def fix_truncated_json(cls, text: str) -> Dict[str, Any]:
        """修复截断的 JSON"""
        start_idx = text.find('{')
        if start_idx == -1:
            raise ValueError("Cannot fix truncated JSON")
-        
+
        json_str = text[start_idx:]
-        
+
        # 计算缺失的闭合符号
        open_braces = json_str.count('{')
        close_braces = json_str.count('}')
        open_brackets = json_str.count('[')
        close_brackets = json_str.count(']')
-        
+
        # 补全缺失的闭合符号
        json_str += ']' * max(0, open_brackets - close_brackets)
        json_str += '}' * max(0, open_braces - close_braces)
-        
+
        # 修复格式
        json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
        return json.loads(json_str)
-    
-    @classmethod
-    def repair_with_library(cls, text: str) -> Dict[str, Any]:
-        """使用 json-repair 库修复损坏的 JSON"""
-        if not JSON_REPAIR_AVAILABLE:
-            raise ValueError("json-repair library not available")
-        
-        start_idx = text.find('{')
-        if start_idx == -1:
-            raise ValueError("No JSON object found for repair")
-        
-        end_idx = text.rfind('}')
-        if end_idx > start_idx:
-            json_str = text[start_idx:end_idx + 1]
-        else:
-            json_str = text[start_idx:]
-        
-        repaired = repair_json(json_str, return_objects=True)
-        
-        if isinstance(repaired, dict):
-            return repaired
-        
-        raise ValueError(f"json-repair returned unexpected type: {type(repaired)}")
-    
+
    @classmethod
    def parse(cls, text: str, default: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
-        从 LLM 响应中解析 JSON（增强版）
-        
+        从 LLM 响应中解析 JSON（优先使用 json-repair）
+
        Args:
            text: LLM 响应文本
            default: 解析失败时返回的默认值，如果为 None 则抛出异常
-            
+
        Returns:
            解析后的字典
        """
@ -163,19 +200,25 @@ class AgentJsonParser:
                logger.warning("LLM 响应为空，返回默认值")
                return default
            raise ValueError("LLM 响应内容为空")
-        
+
        clean = cls.clean_text(text)
-        
-        # 尝试多种方式解析
-        attempts = [
+
+        # 🔥 优先使用 json-repair，它能处理大多数格式问题
+        attempts = []
+
+        # 如果 json-repair 可用，优先使用它
+        if JSON_REPAIR_AVAILABLE:
+            attempts.append(("json-repair", lambda: cls.repair_with_library(text)))
+
+        # 然后尝试其他方法作为后备
+        attempts.extend([
            ("直接解析", lambda: json.loads(text)),
            ("清理后解析", lambda: json.loads(cls.fix_json_format(clean))),
            ("Markdown 提取", lambda: cls.extract_from_markdown(text)),
            ("智能提取", lambda: cls.extract_json_object(clean)),
            ("截断修复", lambda: cls.fix_truncated_json(clean)),
-            ("json-repair", lambda: cls.repair_with_library(text)),
-        ]
-        
+        ])
+
        last_error = None
        for name, attempt in attempts:
            try:
@ -187,30 +230,30 @@ class AgentJsonParser:
            except Exception as e:
                last_error = e
                logger.debug(f"JSON 解析方法 '{name}' 失败: {e}")
-        
+
        # 所有尝试都失败
        if default is not None:
            logger.warning(f"JSON 解析失败，返回默认值。原始内容: {text[:200]}...")
            return default
-        
+
        logger.error(f"❌ 无法解析 JSON，原始内容: {text[:500]}...")
        raise ValueError(f"无法解析 JSON: {last_error}")
-    
+
    @classmethod
    def parse_findings(cls, text: str) -> List[Dict[str, Any]]:
        """
        专门解析 findings 列表
-        
+
        Args:
            text: LLM 响应文本
-            
+
        Returns:
            findings 列表（每个元素都是字典）
        """
        try:
            result = cls.parse(text, default={"findings": []})
            findings = result.get("findings", [])
-            
+
            # 确保每个 finding 都是字典
            valid_findings = []
            for f in findings:
@ -219,33 +262,70 @@ class AgentJsonParser:
                elif isinstance(f, str):
                    # 尝试将字符串解析为 JSON
                    try:
-                        parsed = json.loads(f)
+                        # 优先使用 json-repair
+                        if JSON_REPAIR_AVAILABLE:
+                            parsed = repair_json(f, return_objects=True)
+                        else:
+                            parsed = json.loads(f)
                        if isinstance(parsed, dict):
                            valid_findings.append(parsed)
-                    except json.JSONDecodeError:
+                    except Exception:
                        logger.warning(f"跳过无效的 finding（字符串）: {f[:100]}...")
                else:
                    logger.warning(f"跳过无效的 finding（类型: {type(f)}）")
-            
+
            return valid_findings
-            
+
        except Exception as e:
            logger.error(f"解析 findings 失败: {e}")
            return []
-    
+
    @classmethod
    def safe_get(cls, data: Union[Dict, str, Any], key: str, default: Any = None) -> Any:
        """
        安全地从数据中获取值
-        
+
        Args:
            data: 可能是字典或其他类型
            key: 要获取的键
            default: 默认值
-            
+
        Returns:
            获取的值或默认值
        """
        if isinstance(data, dict):
            return data.get(key, default)
        return default
+
+    @classmethod
+    def parse_any(cls, text: str, default: Any = None) -> Any:
+        """
+        解析任意 JSON 类型（对象、数组、字符串等）
+
+        Args:
+            text: LLM 响应文本
+            default: 解析失败时返回的默认值
+
+        Returns:
+            解析后的 Python 对象
+        """
+        if not text or not text.strip():
+            return default
+
+        clean = cls.clean_text(text)
+        json_str = cls.extract_json_string(clean)
+
+        # 优先使用 json-repair
+        if JSON_REPAIR_AVAILABLE:
+            try:
+                return repair_json(json_str, return_objects=True)
+            except Exception as e:
+                logger.debug(f"json-repair 解析失败: {e}")
+
+        # 后备方法
+        try:
+            return json.loads(json_str)
+        except Exception as e:
+            logger.debug(f"标准 JSON 解析失败: {e}")
+
+        return default
--- a/backend/app/services/agent/prompts/system_prompts.py
+++ b/backend/app/services/agent/prompts/system_prompts.py
@ -130,6 +130,34 @@ Action: 工具名称
 Action Input: {"参数1": "值1", "参数2": "值2"}
 ```

+### 错误处理指南
+
+当工具执行返回错误时，你会收到详细的错误信息，包括：
+- 工具名称和参数
+- 错误类型和错误信息
+- 堆栈跟踪（如有）
+
+**错误处理策略**：
+
+1. **参数错误** - 检查并修正参数格式
+   - 确保 JSON 格式正确
+   - 检查必填参数是否提供
+   - 验证参数类型（字符串、数字、列表等）
+
+2. **资源不存在** - 调整目标
+   - 文件不存在：使用 list_files 确认路径
+   - 工具不可用：使用其他替代工具
+
+3. **权限/超时错误** - 跳过或简化
+   - 记录问题，继续其他分析
+   - 尝试更小范围的操作
+
+4. **沙箱错误** - 检查环境
+   - Docker 不可用时使用代码分析替代
+   - 记录无法验证的原因
+
+**重要**：遇到错误时，不要放弃！分析错误原因，尝试其他方法完成任务。
+
 ### 完成输出格式

 ```
@ -379,6 +407,22 @@ RECON_SYSTEM_PROMPT = f"""你是 DeepAudit 的侦察 Agent，负责收集和分
 - 调试设置
 - 密钥管理

+## 工作方式
+每一步，你需要输出：
+
+```
+Thought: [分析当前情况，思考需要收集什么信息]
+Action: [工具名称]
+Action Input: {{"参数1": "值1"}}
+```
+
+当你完成信息收集后，输出：
+
+```
+Thought: [总结收集到的所有信息]
+Final Answer: [JSON 格式的结果]
+```
+
 ## 输出格式

 ```
@ -392,12 +436,33 @@ Final Answer: {{
    "entry_points": [
        {{"type": "...", "file": "...", "line": ..., "method": "..."}}
    ],
-    "high_risk_areas": [...],
-    "initial_findings": [...],
+    "high_risk_areas": [
+        "文件路径:行号 - 风险描述"
+    ],
+    "initial_findings": [
+        {{"title": "...", "file_path": "...", "line_start": ..., "description": "..."}}
+    ],
    "summary": "项目侦察总结"
 }}
 ```

+## ⚠️ 重要输出要求
+
+### high_risk_areas 格式要求
+每个高风险区域**必须**包含具体的文件路径，格式为：
+- `"app.py:36 - SECRET_KEY 硬编码"`
+- `"utils/file.py:120 - 使用用户输入构造文件路径"`
+- `"api/views.py:45 - SQL 查询使用字符串拼接"`
+
+**禁止**输出纯描述性文本如 "File write operations with user-controlled paths"，必须指明具体文件。
+
+### initial_findings 格式要求
+每个发现**必须**包含：
+- `title`: 漏洞标题
+- `file_path`: 具体文件路径
+- `line_start`: 行号
+- `description`: 详细描述
+
 {TOOL_USAGE_GUIDE}
 """

--- a/backend/app/services/agent/tools/init.py
+++ b/backend/app/services/agent/tools/init.py
@ -15,7 +15,35 @@ from .rag_tool import RAGQueryTool, SecurityCodeSearchTool, FunctionContextTool
 from .pattern_tool import PatternMatchTool
 from .code_analysis_tool import CodeAnalysisTool, DataFlowAnalysisTool, VulnerabilityValidationTool
 from .file_tool import FileReadTool, FileSearchTool, ListFilesTool
-from .sandbox_tool import SandboxTool, SandboxHttpTool, VulnerabilityVerifyTool, SandboxManager
+from .sandbox_tool import (
+    SandboxTool,
+    SandboxHttpTool,
+    VulnerabilityVerifyTool,
+    SandboxManager,
+)
+
+# 🔥 多语言代码测试工具
+from .sandbox_language import (
+    PhpTestTool,
+    PythonTestTool,
+    JavaScriptTestTool,
+    JavaTestTool,
+    GoTestTool,
+    RubyTestTool,
+    ShellTestTool,
+    UniversalCodeTestTool,
+)
+
+# 🔥 漏洞验证专用工具
+from .sandbox_vuln import (
+    CommandInjectionTestTool,
+    SqlInjectionTestTool,
+    XssTestTool,
+    PathTraversalTestTool,
+    SstiTestTool,
+    DeserializationTestTool,
+    UniversalVulnTestTool,
+)

 # 外部安全工具
 from .external_tools import (
@ -77,6 +105,25 @@ __all__ = [
    "SandboxHttpTool",
    "VulnerabilityVerifyTool",
    "SandboxManager",
+
+    # 🔥 多语言代码测试工具
+    "PhpTestTool",
+    "PythonTestTool",
+    "JavaScriptTestTool",
+    "JavaTestTool",
+    "GoTestTool",
+    "RubyTestTool",
+    "ShellTestTool",
+    "UniversalCodeTestTool",
+
+    # 🔥 漏洞验证专用工具
+    "CommandInjectionTestTool",
+    "SqlInjectionTestTool",
+    "XssTestTool",
+    "PathTraversalTestTool",
+    "SstiTestTool",
+    "DeserializationTestTool",
+    "UniversalVulnTestTool",
    
    # 外部安全工具
    "SemgrepTool",
--- a/backend/app/services/agent/tools/sandbox_language.py
+++ b/backend/app/services/agent/tools/sandbox_language.py
--- a/backend/app/services/agent/tools/sandbox_tool.py
+++ b/backend/app/services/agent/tools/sandbox_tool.py
@ -44,17 +44,24 @@ class SandboxManager:
    async def initialize(self):
        """初始化 Docker 客户端"""
        if self._initialized:
+            logger.info("✅ SandboxManager already initialized")
            return
-        
+
        try:
            import docker
+            logger.info("🔄 Attempting to connect to Docker...")
            self._docker_client = docker.from_env()
            # 测试连接
            self._docker_client.ping()
            self._initialized = True
-            logger.info("Docker sandbox manager initialized")
+            logger.info("✅ Docker sandbox manager initialized successfully")
+        except ImportError as e:
+            logger.error(f"❌ Docker library not installed: {e}")
+            self._docker_client = None
        except Exception as e:
-            logger.warning(f"Docker not available: {e}")
+            logger.warning(f"❌ Docker not available: {e}")
+            import traceback
+            logger.warning(f"Docker connection traceback: {traceback.format_exc()}")
            self._docker_client = None
    
    @property
@ -462,12 +469,13 @@ class SandboxTool(AgentTool):
    沙箱执行工具
    在安全隔离的环境中执行代码和命令
    """
-    
+
    # 允许的命令前缀
    ALLOWED_COMMANDS = [
        "python", "python3", "node", "curl", "wget",
        "cat", "head", "tail", "grep", "find", "ls",
        "echo", "printf", "test", "id", "whoami",
+        "php",  # 🔥 添加 PHP 支持
    ]
    
    def __init__(self, sandbox_manager: Optional[SandboxManager] = None):
@ -763,3 +771,398 @@ class VulnerabilityVerifyTool(AgentTool):
            }
        )

+
+# ============ PHP 测试工具 ============
+
+class PhpTestInput(BaseModel):
+    """PHP 测试输入"""
+    php_code: Optional[str] = Field(default=None, description="要执行的 PHP 代码（可选，与 file_path 二选一）")
+    file_path: Optional[str] = Field(default=None, description="要测试的 PHP 文件路径（可选，与 php_code 二选一）")
+    get_params: Optional[Dict[str, str]] = Field(default=None, description="模拟的 GET 参数，如 {'cmd': 'whoami'}")
+    post_params: Optional[Dict[str, str]] = Field(default=None, description="模拟的 POST 参数")
+    timeout: int = Field(default=30, description="超时时间（秒）")
+
+
+class PhpTestTool(AgentTool):
+    """
+    PHP 代码测试工具
+    在沙箱中执行 PHP 代码，支持模拟 GET/POST 参数
+    """
+
+    def __init__(self, sandbox_manager: Optional[SandboxManager] = None, project_root: str = "."):
+        super().__init__()
+        self.sandbox_manager = sandbox_manager or SandboxManager()
+        self.project_root = project_root
+
+    @property
+    def name(self) -> str:
+        return "php_test"
+
+    @property
+    def description(self) -> str:
+        return """在沙箱中测试 PHP 代码，支持模拟 GET/POST 参数。
+专门用于验证 PHP 漏洞（如命令注入、SQL 注入等）。
+
+输入 (二选一):
+- php_code: 直接提供要执行的 PHP 代码
+- file_path: 项目中的 PHP 文件路径
+
+模拟参数:
+- get_params: 模拟 $_GET 参数，如 {"cmd": "whoami", "id": "1"}
+- post_params: 模拟 $_POST 参数
+
+示例:
+1. 测试命令注入:
+   {"file_path": "vuln.php", "get_params": {"cmd": "whoami"}}
+
+2. 直接测试代码:
+   {"php_code": "<?php echo shell_exec($_GET['cmd']); ?>", "get_params": {"cmd": "id"}}
+
+⚠️ 在沙箱中执行，不影响真实环境。"""
+
+    @property
+    def args_schema(self):
+        return PhpTestInput
+
+    async def _execute(
+        self,
+        php_code: Optional[str] = None,
+        file_path: Optional[str] = None,
+        get_params: Optional[Dict[str, str]] = None,
+        post_params: Optional[Dict[str, str]] = None,
+        timeout: int = 30,
+        **kwargs
+    ) -> ToolResult:
+        """执行 PHP 测试"""
+        try:
+            await self.sandbox_manager.initialize()
+        except Exception as e:
+            logger.warning(f"Sandbox init failed: {e}")
+
+        if not self.sandbox_manager.is_available:
+            return ToolResult(
+                success=False,
+                error="沙箱环境不可用 (Docker Unavailable)",
+            )
+
+        # 构建 PHP 代码
+        if file_path:
+            # 从文件读取
+            import os
+            full_path = os.path.join(self.project_root, file_path)
+            if not os.path.exists(full_path):
+                return ToolResult(
+                    success=False,
+                    error=f"文件不存在: {file_path}",
+                )
+            with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                php_code = f.read()
+
+        if not php_code:
+            return ToolResult(
+                success=False,
+                error="必须提供 php_code 或 file_path",
+            )
+
+        # 构建模拟 $_GET 和 $_POST 的包装代码
+        wrapper_parts = ["<?php"]
+
+        # 模拟 $_GET
+        if get_params:
+            for key, value in get_params.items():
+                # 安全转义
+                escaped_value = value.replace("'", "\\'")
+                wrapper_parts.append(f"$_GET['{key}'] = '{escaped_value}';")
+
+        # 模拟 $_POST
+        if post_params:
+            for key, value in post_params.items():
+                escaped_value = value.replace("'", "\\'")
+                wrapper_parts.append(f"$_POST['{key}'] = '{escaped_value}';")
+
+        # 移除 php_code 开头的 <?php 标签
+        clean_code = php_code.strip()
+        if clean_code.startswith("<?php"):
+            clean_code = clean_code[5:].strip()
+        if clean_code.startswith("<?"):
+            clean_code = clean_code[2:].strip()
+        if clean_code.endswith("?>"):
+            clean_code = clean_code[:-2].strip()
+
+        wrapper_parts.append(clean_code)
+        wrapper_parts.append("?>")
+
+        full_php_code = "\n".join(wrapper_parts)
+
+        # 在沙箱中执行
+        # 使用 php -r 直接执行代码
+        import shlex
+        escaped_code = full_php_code.replace("'", "'\"'\"'")
+        command = f"php -r '{escaped_code}'"
+
+        result = await self.sandbox_manager.execute_command(
+            command=command,
+            timeout=timeout,
+        )
+
+        # 格式化输出
+        output_parts = ["🐘 PHP 测试结果\n"]
+
+        if get_params:
+            output_parts.append(f"模拟 GET 参数: {get_params}")
+        if post_params:
+            output_parts.append(f"模拟 POST 参数: {post_params}")
+
+        output_parts.append(f"\n退出码: {result['exit_code']}")
+
+        if result["stdout"]:
+            stdout = result["stdout"][:3000]
+            output_parts.append(f"\n输出:\n```\n{stdout}\n```")
+
+        if result["stderr"]:
+            stderr = result["stderr"][:1000]
+            output_parts.append(f"\n错误:\n```\n{stderr}\n```")
+
+        # 判断是否执行成功
+        is_vulnerable = False
+        evidence = None
+
+        if result["exit_code"] == 0 and result["stdout"]:
+            # 检查是否有命令执行输出
+            stdout_lower = result["stdout"].lower()
+            if get_params and "cmd" in get_params:
+                cmd_value = get_params["cmd"].lower()
+                # 检查常见命令输出
+                if cmd_value in ["whoami", "id"]:
+                    if "root" in stdout_lower or "uid=" in stdout_lower or "www-data" in stdout_lower:
+                        is_vulnerable = True
+                        evidence = f"命令 '{get_params['cmd']}' 执行成功，输出: {result['stdout'][:200]}"
+                elif cmd_value.startswith("echo "):
+                    expected = cmd_value[5:].lower()
+                    if expected in stdout_lower:
+                        is_vulnerable = True
+                        evidence = f"Echo 命令执行成功"
+                else:
+                    # 通用检查：有输出就可能成功
+                    if len(result["stdout"].strip()) > 0:
+                        is_vulnerable = True
+                        evidence = f"命令可能执行成功，输出: {result['stdout'][:200]}"
+
+        if is_vulnerable:
+            output_parts.append(f"\n🔴 **漏洞确认**: {evidence}")
+        else:
+            output_parts.append(f"\n🟡 未能确认漏洞执行（可能需要检查输出）")
+
+        return ToolResult(
+            success=True,
+            data="\n".join(output_parts),
+            metadata={
+                "exit_code": result["exit_code"],
+                "is_vulnerable": is_vulnerable,
+                "evidence": evidence,
+                "stdout": result["stdout"][:500] if result["stdout"] else None,
+            }
+        )
+
+
+# ============ 命令注入专用测试工具 ============
+
+class CommandInjectionTestInput(BaseModel):
+    """命令注入测试输入"""
+    target_file: str = Field(description="目标文件路径（如 'vuln.php'）")
+    param_name: str = Field(default="cmd", description="注入参数名（默认 'cmd'）")
+    test_command: str = Field(default="id", description="测试命令（默认 'id'）")
+    language: str = Field(default="php", description="目标语言 (php, python, node)")
+
+
+class CommandInjectionTestTool(AgentTool):
+    """
+    命令注入专用测试工具
+    智能检测和验证命令注入漏洞
+    """
+
+    def __init__(self, sandbox_manager: Optional[SandboxManager] = None, project_root: str = "."):
+        super().__init__()
+        self.sandbox_manager = sandbox_manager or SandboxManager()
+        self.project_root = project_root
+
+    @property
+    def name(self) -> str:
+        return "test_command_injection"
+
+    @property
+    def description(self) -> str:
+        return """专门用于测试命令注入漏洞的工具。
+
+输入:
+- target_file: 目标文件路径
+- param_name: 注入参数名（默认 'cmd'）
+- test_command: 测试命令（默认 'id'，也可用 'whoami', 'echo test'）
+- language: 目标语言（php, python, node）
+
+示例:
+{"target_file": "ttt/t.php", "param_name": "cmd", "test_command": "whoami"}
+
+自动执行:
+1. 读取目标文件代码
+2. 构建包含测试命令的执行环境
+3. 在沙箱中执行并分析结果
+4. 判断命令注入是否成功"""
+
+    @property
+    def args_schema(self):
+        return CommandInjectionTestInput
+
+    async def _execute(
+        self,
+        target_file: str,
+        param_name: str = "cmd",
+        test_command: str = "id",
+        language: str = "php",
+        **kwargs
+    ) -> ToolResult:
+        """执行命令注入测试"""
+        try:
+            await self.sandbox_manager.initialize()
+        except Exception as e:
+            logger.warning(f"Sandbox init failed: {e}")
+
+        if not self.sandbox_manager.is_available:
+            return ToolResult(
+                success=False,
+                error="沙箱环境不可用 (Docker Unavailable)",
+            )
+
+        import os
+        full_path = os.path.join(self.project_root, target_file)
+
+        if not os.path.exists(full_path):
+            return ToolResult(
+                success=False,
+                error=f"文件不存在: {target_file}",
+            )
+
+        # 读取文件内容
+        with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+            code_content = f.read()
+
+        output_parts = ["🎯 命令注入测试\n"]
+        output_parts.append(f"目标文件: {target_file}")
+        output_parts.append(f"注入参数: {param_name}")
+        output_parts.append(f"测试命令: {test_command}")
+        output_parts.append(f"语言: {language}")
+
+        # 根据语言构建测试
+        if language.lower() == "php":
+            result = await self._test_php_injection(code_content, param_name, test_command)
+        elif language.lower() == "python":
+            result = await self._test_python_injection(code_content, param_name, test_command)
+        else:
+            return ToolResult(
+                success=False,
+                error=f"暂不支持语言: {language}",
+            )
+
+        output_parts.append(f"\n退出码: {result['exit_code']}")
+
+        if result.get("stdout"):
+            output_parts.append(f"\n命令输出:\n```\n{result['stdout'][:2000]}\n```")
+
+        if result.get("stderr"):
+            output_parts.append(f"\n错误输出:\n```\n{result['stderr'][:500]}\n```")
+
+        # 分析结果
+        is_vulnerable = False
+        evidence = None
+        poc = None
+
+        if result["exit_code"] == 0 and result.get("stdout"):
+            stdout = result["stdout"].strip()
+            # 检查命令执行特征
+            if test_command in ["id", "whoami"]:
+                if "uid=" in stdout or "root" in stdout or "www-data" in stdout or stdout.strip():
+                    is_vulnerable = True
+                    evidence = f"命令 '{test_command}' 成功执行，输出: {stdout[:200]}"
+                    poc = f"curl 'http://target/{target_file}?{param_name}={test_command}'"
+            elif test_command.startswith("echo "):
+                expected = test_command[5:]
+                if expected in stdout:
+                    is_vulnerable = True
+                    evidence = f"Echo 测试成功"
+                    poc = f"curl 'http://target/{target_file}?{param_name}=echo+test'"
+            else:
+                if len(stdout) > 0:
+                    is_vulnerable = True
+                    evidence = f"命令可能执行成功，输出: {stdout[:200]}"
+                    poc = f"curl 'http://target/{target_file}?{param_name}={test_command}'"
+
+        if is_vulnerable:
+            output_parts.append(f"\n\n🔴 **漏洞已确认!**")
+            output_parts.append(f"证据: {evidence}")
+            output_parts.append(f"\nPoC: `{poc}`")
+        else:
+            output_parts.append(f"\n\n🟡 未能确认漏洞")
+            if result.get("stderr"):
+                output_parts.append(f"可能原因: 执行错误或参数未正确传递")
+
+        return ToolResult(
+            success=True,
+            data="\n".join(output_parts),
+            metadata={
+                "is_vulnerable": is_vulnerable,
+                "evidence": evidence,
+                "poc": poc,
+                "exit_code": result["exit_code"],
+            }
+        )
+
+    async def _test_php_injection(self, code: str, param_name: str, test_command: str) -> Dict[str, Any]:
+        """测试 PHP 命令注入"""
+        # 构建模拟环境
+        wrapper = f"""<?php
+$_GET['{param_name}'] = '{test_command}';
+$_POST['{param_name}'] = '{test_command}';
+$_REQUEST['{param_name}'] = '{test_command}';
+"""
+        # 移除原代码的 PHP 标签
+        clean_code = code.strip()
+        if clean_code.startswith("<?php"):
+            clean_code = clean_code[5:]
+        elif clean_code.startswith("<?"):
+            clean_code = clean_code[2:]
+        if clean_code.endswith("?>"):
+            clean_code = clean_code[:-2]
+
+        full_code = wrapper + clean_code + "\n?>"
+
+        # 转义并执行
+        escaped_code = full_code.replace("'", "'\"'\"'")
+        command = f"php -r '{escaped_code}'"
+
+        return await self.sandbox_manager.execute_command(command, timeout=30)
+
+    async def _test_python_injection(self, code: str, param_name: str, test_command: str) -> Dict[str, Any]:
+        """测试 Python 命令注入"""
+        # 模拟 request.args.get
+        wrapper = f"""
+import sys
+class MockArgs:
+    def get(self, key, default=None):
+        if key == '{param_name}':
+            return '{test_command}'
+        return default
+
+class MockRequest:
+    args = MockArgs()
+    form = MockArgs()
+
+request = MockRequest()
+sys.argv = ['script.py', '{test_command}']
+
+"""
+        full_code = wrapper + code
+
+        escaped_code = full_code.replace("'", "'\"'\"'")
+        command = f"python3 -c '{escaped_code}'"
+
+        return await self.sandbox_manager.execute_command(command, timeout=30)
--- a/backend/app/services/agent/tools/sandbox_vuln.py
+++ b/backend/app/services/agent/tools/sandbox_vuln.py
--- a/frontend/src/assets/styles/globals.css
+++ b/frontend/src/assets/styles/globals.css
--- a/frontend/src/components/audit/TerminalProgressDialog.tsx
+++ b/frontend/src/components/audit/TerminalProgressDialog.tsx
@ -432,12 +432,13 @@ export default function TerminalProgressDialog({
    return (
        <Dialog open={open} onOpenChange={onOpenChange}>
            <DialogPortal>
-                <DialogOverlay className="bg-black/80 backdrop-blur-sm" />
+                <DialogOverlay className="bg-black/85 backdrop-blur-md" />
                <DialogPrimitive.Content
                    className={cn(
                        "fixed left-[50%] top-[50%] z-50 translate-x-[-50%] translate-y-[-50%]",
                        "w-[95vw] max-w-[1000px] h-[85vh] max-h-[700px]",
-                        "cyber-card p-0 gap-0 overflow-hidden",
+                        "bg-[#08090d] border border-[#1a2535] rounded overflow-hidden",
+                        "shadow-[0_0_60px_rgba(0,0,0,0.8),inset_0_1px_0_rgba(255,255,255,0.02)]",
                        "data-[state=open]:animate-in data-[state=closed]:animate-out",
                        "data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
                        "data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95",
@ -453,30 +454,44 @@ export default function TerminalProgressDialog({
                        </DialogPrimitive.Description>
                    </VisuallyHidden.Root>

+                    {/* Scanline overlay */}
+                    <div className="absolute inset-0 pointer-events-none z-20 opacity-30"
+                        style={{
+                            backgroundImage: "repeating-linear-gradient(0deg, transparent, transparent 2px, rgba(0,0,0,0.1) 2px, rgba(0,0,0,0.1) 4px)",
+                        }}
+                    />
+
                    {/* Header */}
-                    <div className="cyber-card-header justify-between">
+                    <div className="flex items-center justify-between px-4 py-3 bg-[#0a0c10] border-b border-[#1a2535]"
+                        style={{ backgroundImage: "linear-gradient(90deg, rgba(255, 95, 31, 0.05) 0%, transparent 50%, rgba(14, 181, 196, 0.05) 100%)" }}>
                        <div className="flex items-center gap-3">
-                            <Terminal className="w-5 h-5 text-primary" />
+                            <Terminal className="w-5 h-5 text-primary" style={{ filter: "drop-shadow(0 0 8px rgba(255, 95, 31, 0.5))" }} />
                            <div>
-                                <span className="text-lg font-bold uppercase tracking-wider text-white">AUDIT_TERMINAL</span>
-                                <span className="text-xs text-gray-500 ml-2">v2.0</span>
+                                <span className="text-lg font-bold uppercase tracking-[0.15em] text-[#f0e6d3]" style={{ textShadow: "0 0 20px rgba(255, 95, 31, 0.3)" }}>AUDIT_TERMINAL</span>
+                                <span className="text-[10px] text-[#5a6577] ml-2 tracking-wider">v3.0</span>
                            </div>
                        </div>

                        <div className="flex items-center gap-4">
                            {/* 状态指示灯 */}
-                            <div className="flex items-center gap-2 px-3 py-1 bg-gray-900 rounded border border-gray-800">
-                                <div className={`w-2 h-2 rounded-full ${!isCompleted && !isFailed && !isCancelled ? 'bg-emerald-400 shadow-[0_0_6px_rgba(52,211,153,0.6)] animate-pulse' : 'bg-gray-600'}`} />
-                                <div className={`w-2 h-2 rounded-full ${isFailed ? 'bg-rose-400 shadow-[0_0_6px_rgba(251,113,133,0.6)]' : 'bg-gray-600'}`} />
-                                <div className={`w-2 h-2 rounded-full ${isCompleted ? 'bg-sky-400 shadow-[0_0_6px_rgba(56,189,248,0.6)]' : 'bg-gray-600'}`} />
+                            <div className="flex items-center gap-2.5 px-3 py-1.5 bg-[#060810] rounded border border-[#1a2535]">
+                                <div className={`w-2.5 h-2.5 rounded-full transition-all duration-300 ${!isCompleted && !isFailed && !isCancelled
+                                    ? 'bg-[#3dd68c] shadow-[0_0_10px_rgba(61,214,140,0.7)] animate-pulse'
+                                    : 'bg-[#3a4555]'}`} />
+                                <div className={`w-2.5 h-2.5 rounded-full transition-all duration-300 ${isFailed
+                                    ? 'bg-[#f87171] shadow-[0_0_10px_rgba(248,113,113,0.7)]'
+                                    : 'bg-[#3a4555]'}`} />
+                                <div className={`w-2.5 h-2.5 rounded-full transition-all duration-300 ${isCompleted
+                                    ? 'bg-[#22d3ee] shadow-[0_0_10px_rgba(34,211,238,0.7)]'
+                                    : 'bg-[#3a4555]'}`} />
                            </div>

                            <button
                                type="button"
-                                className="w-8 h-8 flex items-center justify-center hover:bg-rose-500/20 rounded transition-colors"
+                                className="w-8 h-8 flex items-center justify-center hover:bg-[#e53935]/20 rounded transition-all duration-200 group"
                                onClick={() => onOpenChange(false)}
                            >
-                                <XIcon className="w-5 h-5 text-gray-400 hover:text-rose-400" />
+                                <XIcon className="w-5 h-5 text-[#6a7587] group-hover:text-[#f87171] transition-colors" />
                            </button>
                        </div>
                    </div>
@ -484,19 +499,22 @@ export default function TerminalProgressDialog({
                    {/* Main Content */}
                    <div className="flex h-[calc(100%-56px)]">
                        {/* Left Sidebar - Task Info */}
-                        <div className="w-48 p-4 border-r border-gray-800 bg-gray-900/30 flex flex-col gap-4">
-                            <div className="space-y-1">
-                                <div className="text-[10px] font-bold text-gray-600 uppercase tracking-wider">Task ID</div>
-                                <div className="text-xs font-mono text-primary truncate bg-gray-900 p-2 rounded border border-gray-800">
+                        <div className="w-48 p-4 border-r border-[#1a2535] bg-[#060810] flex flex-col gap-4">
+                            <div className="space-y-1.5">
+                                <div className="text-[9px] font-bold text-[#5a6577] uppercase tracking-[0.15em]">Task ID</div>
+                                <div className="text-xs font-mono text-primary truncate bg-[#0a0c10] p-2.5 rounded border border-[#1a2535]"
+                                    style={{ textShadow: "0 0 10px rgba(255, 95, 31, 0.3)" }}>
                                    {taskId?.slice(0, 8)}...
                                </div>
                            </div>

-                            <div className="space-y-1">
-                                <div className="text-[10px] font-bold text-gray-600 uppercase tracking-wider">Type</div>
-                                <div className="flex items-center gap-2 bg-gray-900 p-2 rounded border border-gray-800">
-                                    {taskType === 'repository' ? <Cpu className="w-3 h-3 text-sky-400" /> : <HardDrive className="w-3 h-3 text-amber-400" />}
-                                    <span className="text-xs font-bold text-gray-300 uppercase">{taskType}</span>
+                            <div className="space-y-1.5">
+                                <div className="text-[9px] font-bold text-[#5a6577] uppercase tracking-[0.15em]">Type</div>
+                                <div className="flex items-center gap-2 bg-[#0a0c10] p-2.5 rounded border border-[#1a2535]">
+                                    {taskType === 'repository'
+                                        ? <Cpu className="w-3.5 h-3.5 text-[#22d3ee]" style={{ filter: "drop-shadow(0 0 6px rgba(34, 211, 238, 0.5))" }} />
+                                        : <HardDrive className="w-3.5 h-3.5 text-[#fbbf24]" style={{ filter: "drop-shadow(0 0 6px rgba(251, 191, 36, 0.5))" }} />}
+                                    <span className="text-xs font-bold text-[#d0d8e8] uppercase tracking-wider">{taskType}</span>
                                </div>
                            </div>

@ -504,7 +522,7 @@ export default function TerminalProgressDialog({

                            {/* Status Badge */}
                            <div className="space-y-2">
-                                <div className="text-[10px] font-bold text-gray-600 uppercase tracking-wider">Status</div>
+                                <div className="text-[9px] font-bold text-[#5a6577] uppercase tracking-[0.15em]">Status</div>
                                {isCancelled ? (
                                    <Badge className="w-full justify-center cyber-badge-warning">CANCELLED</Badge>
                                ) : isCompleted ? (
@ -520,17 +538,18 @@ export default function TerminalProgressDialog({
                        {/* Terminal Screen */}
                        <div className="flex-1 flex flex-col">
                            {/* Terminal Output */}
-                            <div className="flex-1 bg-[#0a0a0f] p-4 overflow-y-auto font-mono text-sm custom-scrollbar relative">
+                            <div className="flex-1 bg-[#050608] p-4 overflow-y-auto font-mono text-sm custom-scrollbar relative">
                                {/* Grid background */}
-                                <div className="absolute inset-0 cyber-grid-subtle pointer-events-none opacity-30" />
+                                <div className="absolute inset-0 cyber-grid-subtle pointer-events-none opacity-40" />

                                <div className="relative z-10 space-y-0.5 pb-10">
                                    {logs.map((log) => (
-                                        <div key={log.id} className="flex items-start gap-3 hover:bg-white/5 px-2 py-0.5 transition-colors group rounded">
-                                            <span className="text-gray-600 text-xs flex-shrink-0 w-20 font-mono">
+                                        <div key={log.id} className="flex items-start gap-3 hover:bg-[#ffffff]/[0.03] px-2 py-0.5 transition-colors group rounded">
+                                            <span className="text-[#4a5565] text-xs flex-shrink-0 w-20 font-mono">
                                                {log.timestamp}
                                            </span>
-                                            <span className={`${getLogColor(log.type)} flex-1 font-mono text-sm`}>
+                                            <span className={`${getLogColor(log.type)} flex-1 font-mono text-sm`}
+                                                style={{ textShadow: log.type === 'success' ? '0 0 8px rgba(61, 214, 140, 0.3)' : log.type === 'error' ? '0 0 8px rgba(248, 113, 113, 0.3)' : log.type === 'warning' ? '0 0 8px rgba(251, 191, 36, 0.3)' : 'none' }}>
                                                {log.message}
                                            </span>
                                        </div>
@ -538,8 +557,8 @@ export default function TerminalProgressDialog({

                                    {!isCompleted && !isFailed && !isCancelled && (
                                        <div className="flex items-center gap-3 mt-4 px-2">
-                                            <span className="text-gray-600 text-xs w-20 font-mono">{currentTime}</span>
-                                            <span className="text-primary animate-pulse font-bold">_</span>
+                                            <span className="text-[#4a5565] text-xs w-20 font-mono">{currentTime}</span>
+                                            <span className="text-primary animate-pulse font-bold" style={{ textShadow: "0 0 10px rgba(255, 95, 31, 0.5)" }}>_</span>
                                        </div>
                                    )}
                                    <div ref={logsEndRef} />
@ -547,11 +566,11 @@ export default function TerminalProgressDialog({
                            </div>

                            {/* Bottom Controls */}
-                            <div className="h-14 px-4 border-t border-gray-800 bg-gray-900/50 flex items-center justify-between">
-                                <div className="flex items-center gap-2 text-xs text-gray-500 font-mono">
-                                    <Activity className="w-3 h-3" />
+                            <div className="h-14 px-4 border-t border-[#1a2535] bg-[#0a0c10]/90 flex items-center justify-between">
+                                <div className="flex items-center gap-2 text-xs text-[#6a7587] font-mono tracking-wide">
+                                    <Activity className="w-3.5 h-3.5" />
                                    <span>
-                                        {isCompleted ? "任务已完成" : isFailed ? "任务失败" : isCancelled ? "任务已取消" : "正在执行..."}
+                                        {isCompleted ? "TASK COMPLETED" : isFailed ? "TASK FAILED" : isCancelled ? "TASK CANCELLED" : "EXECUTING..."}
                                    </span>
                                </div>

@ -561,9 +580,9 @@ export default function TerminalProgressDialog({
                                            size="sm"
                                            variant="outline"
                                            onClick={handleCancel}
-                                            className="h-8 cyber-btn-outline text-amber-400 border-amber-500/30 hover:bg-amber-500/10 hover:border-amber-500/50"
+                                            className="h-8 bg-transparent border-[#fbbf24]/40 text-[#fbbf24] hover:bg-[#fbbf24]/10 hover:border-[#fbbf24]/60 font-mono uppercase tracking-wider text-[10px]"
                                        >
-                                            <AlertTriangle className="w-3 h-3 mr-1" />
+                                            <AlertTriangle className="w-3 h-3 mr-1.5" />
                                            取消任务
                                        </Button>
                                    )}
@ -573,9 +592,9 @@ export default function TerminalProgressDialog({
                                            size="sm"
                                            variant="outline"
                                            onClick={() => window.open('/logs', '_blank')}
-                                            className="h-8 cyber-btn-outline"
+                                            className="h-8 bg-transparent border-[#6a7587]/40 text-[#a8b0c0] hover:bg-[#1a2030]/50 hover:border-[#6a7587]/60 font-mono uppercase tracking-wider text-[10px]"
                                        >
-                                            <Activity className="w-3 h-3 mr-1" />
+                                            <Activity className="w-3 h-3 mr-1.5" />
                                            查看日志
                                        </Button>
                                    )}
@ -584,9 +603,9 @@ export default function TerminalProgressDialog({
                                        <Button
                                            size="sm"
                                            onClick={() => onOpenChange(false)}
-                                            className="h-8 cyber-btn-primary"
+                                            className="h-8 cyber-btn-primary font-mono uppercase tracking-wider text-[10px]"
                                        >
-                                            <CheckCircle2 className="w-3 h-3 mr-1" />
+                                            <CheckCircle2 className="w-3 h-3 mr-1.5" />
                                            确认
                                        </Button>
                                    )}
--- a/frontend/src/pages/AgentAudit/index.tsx
+++ b/frontend/src/pages/AgentAudit/index.tsx
@ -673,37 +673,25 @@ function AgentAuditPageContent() {

  if (isLoading && !task) {
    return (
-      <div className="h-screen bg-[#0a0a0f] flex items-center justify-center relative overflow-hidden">
+      <div className="h-screen bg-[#08090d] flex items-center justify-center relative overflow-hidden">
        {/* Grid background */}
-        <div className="absolute inset-0 opacity-[0.02]"
-          style={{
-            backgroundImage: `
-              linear-gradient(rgba(255,107,44,0.5) 1px, transparent 1px),
-              linear-gradient(90deg, rgba(255,107,44,0.5) 1px, transparent 1px)
-            `,
-            backgroundSize: '32px 32px',
-          }}
-        />
-        <div className="flex items-center gap-3 text-gray-400 relative z-10">
+        <div className="absolute inset-0 cyber-grid opacity-30" />
+        {/* Vignette */}
+        <div className="absolute inset-0 vignette pointer-events-none" />
+        <div className="flex items-center gap-3 text-[#8a95a5] relative z-10">
          <Loader2 className="w-5 h-5 animate-spin text-primary" />
-          <span className="font-mono text-sm">Loading audit task...</span>
+          <span className="font-mono text-sm tracking-wide">LOADING AUDIT TASK...</span>
        </div>
      </div>
    );
  }

  return (
-    <div className="h-screen bg-[#0a0a0f] flex flex-col overflow-hidden relative">
+    <div className="h-screen bg-[#08090d] flex flex-col overflow-hidden relative">
      {/* Subtle grid background */}
-      <div className="absolute inset-0 opacity-[0.015] pointer-events-none"
-        style={{
-          backgroundImage: `
-            linear-gradient(rgba(255,107,44,0.5) 1px, transparent 1px),
-            linear-gradient(90deg, rgba(255,107,44,0.5) 1px, transparent 1px)
-          `,
-          backgroundSize: '48px 48px',
-        }}
-      />
+      <div className="absolute inset-0 cyber-grid-subtle opacity-40 pointer-events-none" />
+      {/* Scanline effect */}
+      <div className="absolute inset-0 scanline-overlay pointer-events-none opacity-50" />

      {/* Header */}
      <Header
@ -718,24 +706,24 @@ function AgentAuditPageContent() {
      {/* Main content */}
      <div className="flex-1 flex overflow-hidden relative">
        {/* Left Panel - Activity Log */}
-        <div className="w-3/4 flex flex-col border-r border-gray-800/50">
+        <div className="w-3/4 flex flex-col border-r border-[#1a2535]">
          {/* Log header */}
-          <div className="flex-shrink-0 h-11 border-b border-gray-800/50 flex items-center justify-between px-4 bg-[#0d0d12]/80 backdrop-blur-sm">
-            <div className="flex items-center gap-3 text-xs text-gray-400">
+          <div className="flex-shrink-0 h-11 border-b border-[#1a2535] flex items-center justify-between px-4 bg-[#0a0c10]/90 backdrop-blur-sm">
+            <div className="flex items-center gap-3 text-xs text-[#8a95a5]">
              <div className="flex items-center gap-2">
-                <Terminal className="w-4 h-4 text-gray-500" />
-                <span className="uppercase font-bold tracking-wider text-gray-300">Activity Log</span>
+                <Terminal className="w-4 h-4 text-[#5a6577]" />
+                <span className="uppercase font-bold tracking-[0.15em] text-[#d0d8e8]">Activity Log</span>
              </div>
              {isConnected && (
-                <div className="flex items-center gap-1.5 text-green-400">
+                <div className="flex items-center gap-1.5 text-[#3dd68c]">
                  <span className="relative flex h-2 w-2">
-                    <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-75"></span>
-                    <span className="relative inline-flex rounded-full h-2 w-2 bg-green-400"></span>
+                    <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-[#3dd68c] opacity-75"></span>
+                    <span className="relative inline-flex rounded-full h-2 w-2 bg-[#3dd68c] shadow-[0_0_8px_rgba(61,214,140,0.5)]"></span>
                  </span>
-                  <span className="text-[10px] font-mono uppercase">Live</span>
+                  <span className="text-[10px] font-mono uppercase tracking-wider">Live</span>
                </div>
              )}
-              <Badge variant="outline" className="h-5 px-1.5 text-[10px] border-gray-700/50 text-gray-500 font-mono">
+              <Badge variant="outline" className="h-5 px-1.5 text-[10px] border-[#2a3545] text-[#6a7587] font-mono bg-[#0d1015]">
                {filteredLogs.length}{!showAllLogs && logs.length !== filteredLogs.length ? ` / ${logs.length}` : ''}
              </Badge>
            </div>
@ -743,11 +731,11 @@ function AgentAuditPageContent() {
            <button
              onClick={() => setAutoScroll(!isAutoScroll)}
              className={`
-                flex items-center gap-1.5 text-[10px] px-2 py-1 rounded font-mono uppercase tracking-wider
+                flex items-center gap-1.5 text-[10px] px-2.5 py-1 rounded font-mono uppercase tracking-wider
                transition-all duration-200
                ${isAutoScroll
-                  ? 'bg-primary/20 text-primary border border-primary/30'
-                  : 'text-gray-500 hover:text-gray-300 border border-transparent hover:border-gray-700'
+                  ? 'bg-primary/15 text-primary border border-primary/40 shadow-[0_0_10px_rgba(255,95,31,0.15)]'
+                  : 'text-[#6a7587] hover:text-[#a8b0c0] border border-transparent hover:border-[#2a3545] hover:bg-[#1a2030]/50'
                }
              `}
            >
@ -757,17 +745,17 @@ function AgentAuditPageContent() {
          </div>

          {/* Log content */}
-          <div className="flex-1 overflow-y-auto p-4 custom-scrollbar">
+          <div className="flex-1 overflow-y-auto p-4 custom-scrollbar bg-[#060810]/50">
            {/* Filter indicator */}
            {selectedAgentId && !showAllLogs && (
-              <div className="mb-3 px-3 py-2 bg-primary/5 border border-primary/20 rounded flex items-center justify-between">
+              <div className="mb-3 px-3 py-2 bg-primary/8 border border-primary/25 rounded flex items-center justify-between">
                <div className="flex items-center gap-2 text-xs text-primary">
                  <Filter className="w-3.5 h-3.5" />
-                  <span>Filtering logs for selected agent</span>
+                  <span className="tracking-wide">Filtering logs for selected agent</span>
                </div>
                <button
                  onClick={() => selectAgent(null)}
-                  className="text-[10px] text-gray-400 hover:text-white transition-colors font-mono uppercase"
+                  className="text-[10px] text-[#6a7587] hover:text-[#d0d8e8] transition-colors font-mono uppercase tracking-wider"
                >
                  Clear
                </button>
@ -777,21 +765,21 @@ function AgentAuditPageContent() {
            {/* Logs */}
            {filteredLogs.length === 0 ? (
              <div className="h-full flex items-center justify-center">
-                <div className="text-center text-gray-600">
+                <div className="text-center text-[#4a5565]">
                  {isRunning ? (
                    <div className="flex flex-col items-center gap-3">
-                      <Loader2 className="w-6 h-6 animate-spin text-gray-500" />
-                      <span className="text-sm">
+                      <Loader2 className="w-6 h-6 animate-spin text-[#5a6577]" />
+                      <span className="text-sm font-mono tracking-wide">
                        {selectedAgentId && !showAllLogs
-                          ? 'Waiting for activity from selected agent...'
-                          : 'Waiting for agent activity...'}
+                          ? 'WAITING FOR ACTIVITY FROM SELECTED AGENT...'
+                          : 'WAITING FOR AGENT ACTIVITY...'}
                      </span>
                    </div>
                  ) : (
-                    <span className="text-sm">
+                    <span className="text-sm font-mono tracking-wide">
                      {selectedAgentId && !showAllLogs
-                        ? 'No activity from selected agent'
-                        : 'No activity yet'}
+                        ? 'NO ACTIVITY FROM SELECTED AGENT'
+                        : 'NO ACTIVITY YET'}
                    </span>
                  )}
                </div>
@ -813,36 +801,36 @@ function AgentAuditPageContent() {

          {/* Status bar */}
          {task && (
-            <div className="flex-shrink-0 h-9 border-t border-gray-800/50 flex items-center justify-between px-4 text-xs bg-[#0d0d12]/80 backdrop-blur-sm">
+            <div className="flex-shrink-0 h-9 border-t border-[#1a2535] flex items-center justify-between px-4 text-xs bg-[#0a0c10]/90 backdrop-blur-sm">
              <span>
                {isRunning ? (
-                  <span className="flex items-center gap-2 text-green-400">
+                  <span className="flex items-center gap-2 text-[#3dd68c]">
                    <span className="relative flex h-1.5 w-1.5">
-                      <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-75"></span>
-                      <span className="relative inline-flex rounded-full h-1.5 w-1.5 bg-green-400"></span>
+                      <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-[#3dd68c] opacity-75"></span>
+                      <span className="relative inline-flex rounded-full h-1.5 w-1.5 bg-[#3dd68c] shadow-[0_0_6px_rgba(61,214,140,0.5)]"></span>
                    </span>
-                    <span className="font-mono">{statusVerb}{'.'.repeat(statusDots)}</span>
+                    <span className="font-mono tracking-wide">{statusVerb}{'.'.repeat(statusDots)}</span>
                  </span>
                ) : isComplete ? (
-                  <span className="text-gray-500 font-mono">Audit {task.status}</span>
+                  <span className="text-[#6a7587] font-mono tracking-wide">AUDIT {task.status?.toUpperCase()}</span>
                ) : (
-                  <span className="text-gray-600 font-mono">Ready</span>
+                  <span className="text-[#4a5565] font-mono tracking-wide">READY</span>
                )}
              </span>
-              <div className="flex items-center gap-4 font-mono text-gray-500">
+              <div className="flex items-center gap-4 font-mono text-[#6a7587]">
                <span>
-                  <span className="text-primary">{task.progress_percentage?.toFixed(0) || 0}</span>
-                  <span className="text-gray-600">%</span>
+                  <span className="text-primary text-glow-primary">{task.progress_percentage?.toFixed(0) || 0}</span>
+                  <span className="text-[#4a5565]">%</span>
                </span>
-                <span className="text-gray-700">|</span>
+                <span className="text-[#2a3545]">│</span>
                <span>
-                  <span className="text-gray-400">{task.analyzed_files}</span>
-                  <span className="text-gray-600">/{task.total_files} files</span>
+                  <span className="text-[#a8b0c0]">{task.analyzed_files}</span>
+                  <span className="text-[#4a5565]">/{task.total_files} files</span>
                </span>
-                <span className="text-gray-700">|</span>
+                <span className="text-[#2a3545]">│</span>
                <span>
-                  <span className="text-gray-400">{task.tool_calls_count || 0}</span>
-                  <span className="text-gray-600"> tools</span>
+                  <span className="text-[#a8b0c0]">{task.tool_calls_count || 0}</span>
+                  <span className="text-[#4a5565]"> tools</span>
                </span>
              </div>
            </div>
@ -850,16 +838,16 @@ function AgentAuditPageContent() {
        </div>

        {/* Right Panel - Agent Tree + Stats */}
-        <div className="w-1/4 flex flex-col bg-[#0b0b10]">
+        <div className="w-1/4 flex flex-col bg-[#080a0e]">
          {/* Agent Tree section */}
-          <div className="flex-1 flex flex-col border-b border-gray-800/50 overflow-hidden">
+          <div className="flex-1 flex flex-col border-b border-[#1a2535] overflow-hidden">
            {/* Tree header */}
-            <div className="flex-shrink-0 h-11 border-b border-gray-800/50 flex items-center justify-between px-4 bg-[#0d0d12]/80">
-              <div className="flex items-center gap-2 text-xs text-gray-400">
-                <Bot className="w-4 h-4 text-gray-500" />
-                <span className="uppercase font-bold tracking-wider text-gray-300">Agent Tree</span>
+            <div className="flex-shrink-0 h-11 border-b border-[#1a2535] flex items-center justify-between px-4 bg-[#0a0c10]/90">
+              <div className="flex items-center gap-2 text-xs text-[#8a95a5]">
+                <Bot className="w-4 h-4 text-[#5a6577]" />
+                <span className="uppercase font-bold tracking-[0.15em] text-[#d0d8e8]">Agent Tree</span>
                {agentTree && (
-                  <Badge variant="outline" className="h-5 px-1.5 text-[10px] border-gray-700/50 text-gray-500 font-mono">
+                  <Badge variant="outline" className="h-5 px-1.5 text-[10px] border-[#2a3545] text-[#6a7587] font-mono bg-[#0d1015]">
                    {agentTree.total_agents}
                  </Badge>
                )}
@ -868,16 +856,16 @@ function AgentAuditPageContent() {
                {selectedAgentId && !showAllLogs && (
                  <button
                    onClick={() => selectAgent(null)}
-                    className="text-[10px] text-primary hover:text-primary/80 transition-colors font-mono uppercase"
+                    className="text-[10px] text-primary hover:text-primary/80 transition-colors font-mono uppercase tracking-wider"
                  >
                    Show All
                  </button>
                )}
                {agentTree && agentTree.running_agents > 0 && (
-                  <div className="flex items-center gap-1.5 text-green-400">
+                  <div className="flex items-center gap-1.5 text-[#3dd68c]">
                    <span className="relative flex h-1.5 w-1.5">
-                      <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-75"></span>
-                      <span className="relative inline-flex rounded-full h-1.5 w-1.5 bg-green-400"></span>
+                      <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-[#3dd68c] opacity-75"></span>
+                      <span className="relative inline-flex rounded-full h-1.5 w-1.5 bg-[#3dd68c] shadow-[0_0_6px_rgba(61,214,140,0.5)]"></span>
                    </span>
                    <span className="text-[10px] font-mono">{agentTree.running_agents}</span>
                  </div>
@ -886,7 +874,7 @@ function AgentAuditPageContent() {
            </div>

            {/* Tree content */}
-            <div className="flex-1 overflow-y-auto p-2 custom-scrollbar">
+            <div className="flex-1 overflow-y-auto p-2 custom-scrollbar bg-[#060810]/50">
              {treeNodes.length > 0 ? (
                treeNodes.map(node => (
                  <AgentTreeNodeItem
@ -897,14 +885,14 @@ function AgentAuditPageContent() {
                  />
                ))
              ) : (
-                <div className="h-full flex items-center justify-center text-gray-600 text-xs">
+                <div className="h-full flex items-center justify-center text-[#4a5565] text-xs">
                  {isRunning ? (
                    <div className="flex items-center gap-2">
                      <Loader2 className="w-3 h-3 animate-spin" />
-                      <span>Initializing agents...</span>
+                      <span className="font-mono tracking-wide">INITIALIZING AGENTS...</span>
                    </div>
                  ) : (
-                    'No agents yet'
+                    <span className="font-mono tracking-wide">NO AGENTS YET</span>
                  )}
                </div>
              )}