""" Tracer - 审计追踪器 提供完整的审计过程追踪,包括: - Agent 创建和状态变化 - 工具执行记录 - 漏洞报告管理 - 最终扫描结果 - 数据持久化 """ import csv import json import logging from app.core.timezone import get_now, get_now_iso from pathlib import Path from typing import Any, Callable, Dict, List, Optional from uuid import uuid4 logger = logging.getLogger(__name__) # 全局 Tracer 实例 _global_tracer: Optional["Tracer"] = None def get_global_tracer() -> Optional["Tracer"]: """获取全局 Tracer 实例""" return _global_tracer def set_global_tracer(tracer: "Tracer") -> None: """设置全局 Tracer 实例""" global _global_tracer _global_tracer = tracer class Tracer: """ 审计追踪器 追踪整个审计过程,支持: - Agent 生命周期追踪 - 工具执行记录 - 漏洞报告收集 - 数据持久化到文件 """ def __init__( self, run_name: Optional[str] = None, output_dir: Optional[Path] = None, ): # 运行标识 self.run_name = run_name self.run_id = run_name or f"run-{uuid4().hex[:8]}" self.start_time = get_now_iso() self.end_time: Optional[str] = None # 追踪数据 self.agents: Dict[str, Dict[str, Any]] = {} self.tool_executions: Dict[int, Dict[str, Any]] = {} self.chat_messages: List[Dict[str, Any]] = [] # 漏洞报告 self.vulnerability_reports: List[Dict[str, Any]] = [] self.final_scan_result: Optional[str] = None # 扫描配置和结果 self.scan_config: Optional[Dict[str, Any]] = None self.scan_results: Optional[Dict[str, Any]] = None # 元数据 self.run_metadata: Dict[str, Any] = { "run_id": self.run_id, "run_name": self.run_name, "start_time": self.start_time, "end_time": None, "status": "running", } # 输出目录 self._output_dir = output_dir self._run_dir: Optional[Path] = None # 计数器 self._next_execution_id = 1 self._next_message_id = 1 self._saved_vuln_ids: set = set() # 回调函数 self.vulnerability_found_callback: Optional[Callable[[str, str, str, str], None]] = None self.agent_status_callback: Optional[Callable[[str, str], None]] = None def set_run_name(self, run_name: str) -> None: """设置运行名称""" self.run_name = run_name self.run_id = run_name self.run_metadata["run_name"] = run_name self.run_metadata["run_id"] = run_name def get_run_dir(self) -> Path: """获取运行输出目录""" if self._run_dir is None: if self._output_dir: base_dir = self._output_dir else: base_dir = Path.cwd() / "audit_runs" base_dir.mkdir(exist_ok=True) run_dir_name = self.run_name or self.run_id # 清理非法字符 run_dir_name = "".join( c if c.isalnum() or c in "-_" else "_" for c in run_dir_name ) self._run_dir = base_dir / run_dir_name self._run_dir.mkdir(exist_ok=True) return self._run_dir def set_scan_config(self, config: Dict[str, Any]) -> None: """设置扫描配置""" self.scan_config = config self.run_metadata.update({ "project_name": config.get("project_name", ""), "scan_type": config.get("scan_type", ""), "files_count": len(config.get("files", [])), }) # 初始化输出目录 self.get_run_dir() # ============ Agent 追踪 ============ def log_agent_creation( self, agent_id: str, name: str, task: str, parent_id: Optional[str] = None, agent_type: str = "generic", ) -> None: """记录 Agent 创建""" agent_data = { "id": agent_id, "name": name, "task": task, "type": agent_type, "status": "running", "parent_id": parent_id, "created_at": get_now_iso(), "updated_at": get_now_iso(), "tool_executions": [], "findings_count": 0, } self.agents[agent_id] = agent_data logger.debug(f"Tracer: Agent created - {name} ({agent_id})") def update_agent_status( self, agent_id: str, status: str, error_message: Optional[str] = None, ) -> None: """更新 Agent 状态""" if agent_id in self.agents: self.agents[agent_id]["status"] = status self.agents[agent_id]["updated_at"] = get_now_iso() if error_message: self.agents[agent_id]["error_message"] = error_message if status in ["completed", "failed", "stopped"]: self.agents[agent_id]["finished_at"] = get_now_iso() # 触发回调 if self.agent_status_callback: try: self.agent_status_callback(agent_id, status) except Exception as e: logger.warning(f"Agent status callback failed: {e}") # ============ 工具执行追踪 ============ def log_tool_execution_start( self, agent_id: str, tool_name: str, args: Dict[str, Any], ) -> int: """记录工具执行开始""" execution_id = self._next_execution_id self._next_execution_id += 1 now = get_now_iso() # 清理过大的参数 cleaned_args = self._clean_args(args) execution_data = { "execution_id": execution_id, "agent_id": agent_id, "tool_name": tool_name, "args": cleaned_args, "status": "running", "result": None, "started_at": now, "completed_at": None, } self.tool_executions[execution_id] = execution_data # 关联到 Agent if agent_id in self.agents: self.agents[agent_id]["tool_executions"].append(execution_id) return execution_id def update_tool_execution( self, execution_id: int, status: str, result: Any = None, ) -> None: """更新工具执行状态""" if execution_id in self.tool_executions: self.tool_executions[execution_id]["status"] = status self.tool_executions[execution_id]["completed_at"] = get_now_iso() # 清理过大的结果 if result is not None: self.tool_executions[execution_id]["result"] = self._clean_result(result) def _clean_args(self, args: Dict[str, Any], max_length: int = 1000) -> Dict[str, Any]: """清理参数,限制长度""" cleaned = {} for key, value in args.items(): if isinstance(value, str) and len(value) > max_length: cleaned[key] = value[:max_length] + "... [truncated]" elif isinstance(value, (list, dict)): try: serialized = json.dumps(value, ensure_ascii=False) if len(serialized) > max_length: cleaned[key] = f"[{type(value).__name__} with {len(value)} items, truncated]" else: cleaned[key] = value except (TypeError, ValueError): cleaned[key] = str(value)[:max_length] else: cleaned[key] = value return cleaned def _clean_result(self, result: Any, max_length: int = 2000) -> Any: """清理结果,限制长度""" if isinstance(result, str): if len(result) > max_length: return result[:max_length] + "... [truncated]" return result if isinstance(result, dict): cleaned = {} for key, value in result.items(): cleaned[key] = self._clean_result(value, max_length // 2) return cleaned if isinstance(result, list): if len(result) > 20: return [self._clean_result(item, max_length // 4) for item in result[:20]] + [ f"... and {len(result) - 20} more items" ] return [self._clean_result(item, max_length // 2) for item in result] return result # ============ 消息追踪 ============ def log_chat_message( self, content: str, role: str, agent_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> int: """记录聊天消息""" message_id = self._next_message_id self._next_message_id += 1 # 清理过长的内容 if len(content) > 5000: content = content[:5000] + "... [truncated]" message_data = { "message_id": message_id, "content": content, "role": role, "agent_id": agent_id, "timestamp": get_now_iso(), "metadata": metadata or {}, } self.chat_messages.append(message_data) return message_id # ============ 漏洞报告 ============ def add_vulnerability_report( self, title: str, content: str, severity: str, agent_id: Optional[str] = None, vulnerability_type: Optional[str] = None, file_path: Optional[str] = None, ) -> str: """添加漏洞报告""" report_id = f"vuln-{len(self.vulnerability_reports) + 1:04d}" report = { "id": report_id, "title": title.strip(), "content": content.strip(), "severity": severity.lower().strip(), "vulnerability_type": vulnerability_type, "file_path": file_path, "agent_id": agent_id, "timestamp": get_now().strftime("%Y-%m-%d %H:%M:%S") + " CST", } self.vulnerability_reports.append(report) logger.info(f"Tracer: Vulnerability report added - {report_id}: {title}") # 更新 Agent 统计 if agent_id and agent_id in self.agents: self.agents[agent_id]["findings_count"] = ( self.agents[agent_id].get("findings_count", 0) + 1 ) # 触发回调 if self.vulnerability_found_callback: try: self.vulnerability_found_callback( report_id, title.strip(), content.strip(), severity.lower().strip(), ) except Exception as e: logger.warning(f"Vulnerability callback failed: {e}") # 自动保存 self._save_vulnerability_reports() return report_id def set_final_scan_result( self, content: str, success: bool = True, ) -> None: """设置最终扫描结果""" self.final_scan_result = content.strip() self.scan_results = { "scan_completed": True, "content": content, "success": success, "completed_at": get_now_iso(), "total_vulnerabilities": len(self.vulnerability_reports), } self.run_metadata["status"] = "completed" if success else "failed" self.end_time = get_now_iso() self.run_metadata["end_time"] = self.end_time logger.info(f"Tracer: Final scan result set, success={success}") # 保存所有数据 self.save_run_data(mark_complete=True) # ============ 数据持久化 ============ def save_run_data(self, mark_complete: bool = False) -> None: """保存运行数据""" try: run_dir = self.get_run_dir() if mark_complete: self.end_time = get_now_iso() self.run_metadata["end_time"] = self.end_time # 保存最终报告 if self.final_scan_result: self._save_final_report(run_dir) # 保存漏洞报告 self._save_vulnerability_reports() # 保存运行元数据 self._save_metadata(run_dir) logger.info(f"Tracer: Run data saved to {run_dir}") except Exception as e: logger.exception(f"Failed to save run data: {e}") def _save_final_report(self, run_dir: Path) -> None: """保存最终报告""" report_file = run_dir / "security_audit_report.md" with report_file.open("w", encoding="utf-8") as f: f.write("# 安全审计报告\n\n") f.write(f"**生成时间:** {get_now().strftime('%Y-%m-%d %H:%M:%S')} CST\n") f.write(f"**运行ID:** {self.run_id}\n\n") # 统计信息 f.write("## 审计概述\n\n") f.write(f"- 发现漏洞数: {len(self.vulnerability_reports)}\n") f.write(f"- 参与Agent数: {len(self.agents)}\n") f.write(f"- 工具调用数: {len(self.tool_executions)}\n\n") # 漏洞统计 if self.vulnerability_reports: severity_counts = {} for vuln in self.vulnerability_reports: severity = vuln.get("severity", "unknown") severity_counts[severity] = severity_counts.get(severity, 0) + 1 f.write("### 漏洞严重性分布\n\n") for severity, count in sorted(severity_counts.items()): f.write(f"- {severity.upper()}: {count}\n") f.write("\n") f.write("---\n\n") f.write(f"{self.final_scan_result}\n") logger.info(f"Saved final report to: {report_file}") def _save_vulnerability_reports(self) -> None: """保存漏洞报告""" if not self.vulnerability_reports: return try: run_dir = self.get_run_dir() vuln_dir = run_dir / "vulnerabilities" vuln_dir.mkdir(exist_ok=True) # 只保存新的报告 new_reports = [ report for report in self.vulnerability_reports if report["id"] not in self._saved_vuln_ids ] for report in new_reports: vuln_file = vuln_dir / f"{report['id']}.md" with vuln_file.open("w", encoding="utf-8") as f: f.write(f"# {report['title']}\n\n") f.write(f"**ID:** {report['id']}\n") f.write(f"**严重性:** {report['severity'].upper()}\n") f.write(f"**发现时间:** {report['timestamp']}\n") if report.get("vulnerability_type"): f.write(f"**漏洞类型:** {report['vulnerability_type']}\n") if report.get("file_path"): f.write(f"**文件位置:** {report['file_path']}\n") f.write("\n## 详细描述\n\n") f.write(f"{report['content']}\n") self._saved_vuln_ids.add(report["id"]) # 保存漏洞索引 CSV if self.vulnerability_reports: csv_file = run_dir / "vulnerabilities.csv" severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} sorted_reports = sorted( self.vulnerability_reports, key=lambda x: (severity_order.get(x["severity"], 5), x["timestamp"]), ) with csv_file.open("w", encoding="utf-8", newline="") as f: fieldnames = ["id", "title", "severity", "type", "file", "timestamp"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for report in sorted_reports: writer.writerow({ "id": report["id"], "title": report["title"], "severity": report["severity"].upper(), "type": report.get("vulnerability_type", ""), "file": report.get("file_path", ""), "timestamp": report["timestamp"], }) if new_reports: logger.info(f"Saved {len(new_reports)} new vulnerability reports to {vuln_dir}") except Exception as e: logger.warning(f"Failed to save vulnerability reports: {e}") def _save_metadata(self, run_dir: Path) -> None: """保存运行元数据""" metadata_file = run_dir / "run_metadata.json" metadata = { **self.run_metadata, "agents_count": len(self.agents), "tool_executions_count": len(self.tool_executions), "vulnerabilities_count": len(self.vulnerability_reports), "duration_seconds": self._calculate_duration(), } with metadata_file.open("w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=2) def _calculate_duration(self) -> float: """计算运行时长""" try: start = datetime.fromisoformat(self.start_time) if self.end_time: end = datetime.fromisoformat(self.end_time) else: end = get_now() return (end - start).total_seconds() except (ValueError, TypeError): return 0.0 # ============ 统计和查询 ============ def get_agent_tools(self, agent_id: str) -> List[Dict[str, Any]]: """获取 Agent 的工具执行记录""" return [ exec_data for exec_data in self.tool_executions.values() if exec_data.get("agent_id") == agent_id ] def get_real_tool_count(self) -> int: """获取实际工具执行次数(排除系统工具)""" system_tools = {"scan_start_info", "subagent_start_info"} return sum( 1 for exec_data in self.tool_executions.values() if exec_data.get("tool_name") not in system_tools ) def get_statistics(self) -> Dict[str, Any]: """获取统计信息""" agent_stats = {"running": 0, "completed": 0, "failed": 0, "stopped": 0} for agent in self.agents.values(): status = agent.get("status", "unknown") if status in agent_stats: agent_stats[status] += 1 vuln_stats = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0} for vuln in self.vulnerability_reports: severity = vuln.get("severity", "medium") if severity in vuln_stats: vuln_stats[severity] += 1 return { "agents": agent_stats, "vulnerabilities": vuln_stats, "total_agents": len(self.agents), "total_vulnerabilities": len(self.vulnerability_reports), "total_tool_executions": self.get_real_tool_count(), "duration_seconds": self._calculate_duration(), } def cleanup(self) -> None: """清理并保存最终数据""" self.save_run_data(mark_complete=True)