CodeReview/backend/app/services/agent/telemetry/tracer.py

573 lines
20 KiB
Python

"""
Tracer - 审计追踪器
提供完整的审计过程追踪,包括:
- Agent 创建和状态变化
- 工具执行记录
- 漏洞报告管理
- 最终扫描结果
- 数据持久化
"""
import csv
import json
import logging
from app.core.timezone import get_now, get_now_iso
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from uuid import uuid4
logger = logging.getLogger(__name__)
# 全局 Tracer 实例
_global_tracer: Optional["Tracer"] = None
def get_global_tracer() -> Optional["Tracer"]:
"""获取全局 Tracer 实例"""
return _global_tracer
def set_global_tracer(tracer: "Tracer") -> None:
"""设置全局 Tracer 实例"""
global _global_tracer
_global_tracer = tracer
class Tracer:
"""
审计追踪器
追踪整个审计过程,支持:
- Agent 生命周期追踪
- 工具执行记录
- 漏洞报告收集
- 数据持久化到文件
"""
def __init__(
self,
run_name: Optional[str] = None,
output_dir: Optional[Path] = None,
):
# 运行标识
self.run_name = run_name
self.run_id = run_name or f"run-{uuid4().hex[:8]}"
self.start_time = get_now_iso()
self.end_time: Optional[str] = None
# 追踪数据
self.agents: Dict[str, Dict[str, Any]] = {}
self.tool_executions: Dict[int, Dict[str, Any]] = {}
self.chat_messages: List[Dict[str, Any]] = []
# 漏洞报告
self.vulnerability_reports: List[Dict[str, Any]] = []
self.final_scan_result: Optional[str] = None
# 扫描配置和结果
self.scan_config: Optional[Dict[str, Any]] = None
self.scan_results: Optional[Dict[str, Any]] = None
# 元数据
self.run_metadata: Dict[str, Any] = {
"run_id": self.run_id,
"run_name": self.run_name,
"start_time": self.start_time,
"end_time": None,
"status": "running",
}
# 输出目录
self._output_dir = output_dir
self._run_dir: Optional[Path] = None
# 计数器
self._next_execution_id = 1
self._next_message_id = 1
self._saved_vuln_ids: set = set()
# 回调函数
self.vulnerability_found_callback: Optional[Callable[[str, str, str, str], None]] = None
self.agent_status_callback: Optional[Callable[[str, str], None]] = None
def set_run_name(self, run_name: str) -> None:
"""设置运行名称"""
self.run_name = run_name
self.run_id = run_name
self.run_metadata["run_name"] = run_name
self.run_metadata["run_id"] = run_name
def get_run_dir(self) -> Path:
"""获取运行输出目录"""
if self._run_dir is None:
if self._output_dir:
base_dir = self._output_dir
else:
base_dir = Path.cwd() / "audit_runs"
base_dir.mkdir(exist_ok=True)
run_dir_name = self.run_name or self.run_id
# 清理非法字符
run_dir_name = "".join(
c if c.isalnum() or c in "-_" else "_"
for c in run_dir_name
)
self._run_dir = base_dir / run_dir_name
self._run_dir.mkdir(exist_ok=True)
return self._run_dir
def set_scan_config(self, config: Dict[str, Any]) -> None:
"""设置扫描配置"""
self.scan_config = config
self.run_metadata.update({
"project_name": config.get("project_name", ""),
"scan_type": config.get("scan_type", ""),
"files_count": len(config.get("files", [])),
})
# 初始化输出目录
self.get_run_dir()
# ============ Agent 追踪 ============
def log_agent_creation(
self,
agent_id: str,
name: str,
task: str,
parent_id: Optional[str] = None,
agent_type: str = "generic",
) -> None:
"""记录 Agent 创建"""
agent_data = {
"id": agent_id,
"name": name,
"task": task,
"type": agent_type,
"status": "running",
"parent_id": parent_id,
"created_at": get_now_iso(),
"updated_at": get_now_iso(),
"tool_executions": [],
"findings_count": 0,
}
self.agents[agent_id] = agent_data
logger.debug(f"Tracer: Agent created - {name} ({agent_id})")
def update_agent_status(
self,
agent_id: str,
status: str,
error_message: Optional[str] = None,
) -> None:
"""更新 Agent 状态"""
if agent_id in self.agents:
self.agents[agent_id]["status"] = status
self.agents[agent_id]["updated_at"] = get_now_iso()
if error_message:
self.agents[agent_id]["error_message"] = error_message
if status in ["completed", "failed", "stopped"]:
self.agents[agent_id]["finished_at"] = get_now_iso()
# 触发回调
if self.agent_status_callback:
try:
self.agent_status_callback(agent_id, status)
except Exception as e:
logger.warning(f"Agent status callback failed: {e}")
# ============ 工具执行追踪 ============
def log_tool_execution_start(
self,
agent_id: str,
tool_name: str,
args: Dict[str, Any],
) -> int:
"""记录工具执行开始"""
execution_id = self._next_execution_id
self._next_execution_id += 1
now = get_now_iso()
# 清理过大的参数
cleaned_args = self._clean_args(args)
execution_data = {
"execution_id": execution_id,
"agent_id": agent_id,
"tool_name": tool_name,
"args": cleaned_args,
"status": "running",
"result": None,
"started_at": now,
"completed_at": None,
}
self.tool_executions[execution_id] = execution_data
# 关联到 Agent
if agent_id in self.agents:
self.agents[agent_id]["tool_executions"].append(execution_id)
return execution_id
def update_tool_execution(
self,
execution_id: int,
status: str,
result: Any = None,
) -> None:
"""更新工具执行状态"""
if execution_id in self.tool_executions:
self.tool_executions[execution_id]["status"] = status
self.tool_executions[execution_id]["completed_at"] = get_now_iso()
# 清理过大的结果
if result is not None:
self.tool_executions[execution_id]["result"] = self._clean_result(result)
def _clean_args(self, args: Dict[str, Any], max_length: int = 1000) -> Dict[str, Any]:
"""清理参数,限制长度"""
cleaned = {}
for key, value in args.items():
if isinstance(value, str) and len(value) > max_length:
cleaned[key] = value[:max_length] + "... [truncated]"
elif isinstance(value, (list, dict)):
try:
serialized = json.dumps(value, ensure_ascii=False)
if len(serialized) > max_length:
cleaned[key] = f"[{type(value).__name__} with {len(value)} items, truncated]"
else:
cleaned[key] = value
except (TypeError, ValueError):
cleaned[key] = str(value)[:max_length]
else:
cleaned[key] = value
return cleaned
def _clean_result(self, result: Any, max_length: int = 2000) -> Any:
"""清理结果,限制长度"""
if isinstance(result, str):
if len(result) > max_length:
return result[:max_length] + "... [truncated]"
return result
if isinstance(result, dict):
cleaned = {}
for key, value in result.items():
cleaned[key] = self._clean_result(value, max_length // 2)
return cleaned
if isinstance(result, list):
if len(result) > 20:
return [self._clean_result(item, max_length // 4) for item in result[:20]] + [
f"... and {len(result) - 20} more items"
]
return [self._clean_result(item, max_length // 2) for item in result]
return result
# ============ 消息追踪 ============
def log_chat_message(
self,
content: str,
role: str,
agent_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> int:
"""记录聊天消息"""
message_id = self._next_message_id
self._next_message_id += 1
# 清理过长的内容
if len(content) > 5000:
content = content[:5000] + "... [truncated]"
message_data = {
"message_id": message_id,
"content": content,
"role": role,
"agent_id": agent_id,
"timestamp": get_now_iso(),
"metadata": metadata or {},
}
self.chat_messages.append(message_data)
return message_id
# ============ 漏洞报告 ============
def add_vulnerability_report(
self,
title: str,
content: str,
severity: str,
agent_id: Optional[str] = None,
vulnerability_type: Optional[str] = None,
file_path: Optional[str] = None,
) -> str:
"""添加漏洞报告"""
report_id = f"vuln-{len(self.vulnerability_reports) + 1:04d}"
report = {
"id": report_id,
"title": title.strip(),
"content": content.strip(),
"severity": severity.lower().strip(),
"vulnerability_type": vulnerability_type,
"file_path": file_path,
"agent_id": agent_id,
"timestamp": get_now().strftime("%Y-%m-%d %H:%M:%S") + " CST",
}
self.vulnerability_reports.append(report)
logger.info(f"Tracer: Vulnerability report added - {report_id}: {title}")
# 更新 Agent 统计
if agent_id and agent_id in self.agents:
self.agents[agent_id]["findings_count"] = (
self.agents[agent_id].get("findings_count", 0) + 1
)
# 触发回调
if self.vulnerability_found_callback:
try:
self.vulnerability_found_callback(
report_id,
title.strip(),
content.strip(),
severity.lower().strip(),
)
except Exception as e:
logger.warning(f"Vulnerability callback failed: {e}")
# 自动保存
self._save_vulnerability_reports()
return report_id
def set_final_scan_result(
self,
content: str,
success: bool = True,
) -> None:
"""设置最终扫描结果"""
self.final_scan_result = content.strip()
self.scan_results = {
"scan_completed": True,
"content": content,
"success": success,
"completed_at": get_now_iso(),
"total_vulnerabilities": len(self.vulnerability_reports),
}
self.run_metadata["status"] = "completed" if success else "failed"
self.end_time = get_now_iso()
self.run_metadata["end_time"] = self.end_time
logger.info(f"Tracer: Final scan result set, success={success}")
# 保存所有数据
self.save_run_data(mark_complete=True)
# ============ 数据持久化 ============
def save_run_data(self, mark_complete: bool = False) -> None:
"""保存运行数据"""
try:
run_dir = self.get_run_dir()
if mark_complete:
self.end_time = get_now_iso()
self.run_metadata["end_time"] = self.end_time
# 保存最终报告
if self.final_scan_result:
self._save_final_report(run_dir)
# 保存漏洞报告
self._save_vulnerability_reports()
# 保存运行元数据
self._save_metadata(run_dir)
logger.info(f"Tracer: Run data saved to {run_dir}")
except Exception as e:
logger.exception(f"Failed to save run data: {e}")
def _save_final_report(self, run_dir: Path) -> None:
"""保存最终报告"""
report_file = run_dir / "security_audit_report.md"
with report_file.open("w", encoding="utf-8") as f:
f.write("# 安全审计报告\n\n")
f.write(f"**生成时间:** {get_now().strftime('%Y-%m-%d %H:%M:%S')} CST\n")
f.write(f"**运行ID:** {self.run_id}\n\n")
# 统计信息
f.write("## 审计概述\n\n")
f.write(f"- 发现漏洞数: {len(self.vulnerability_reports)}\n")
f.write(f"- 参与Agent数: {len(self.agents)}\n")
f.write(f"- 工具调用数: {len(self.tool_executions)}\n\n")
# 漏洞统计
if self.vulnerability_reports:
severity_counts = {}
for vuln in self.vulnerability_reports:
severity = vuln.get("severity", "unknown")
severity_counts[severity] = severity_counts.get(severity, 0) + 1
f.write("### 漏洞严重性分布\n\n")
for severity, count in sorted(severity_counts.items()):
f.write(f"- {severity.upper()}: {count}\n")
f.write("\n")
f.write("---\n\n")
f.write(f"{self.final_scan_result}\n")
logger.info(f"Saved final report to: {report_file}")
def _save_vulnerability_reports(self) -> None:
"""保存漏洞报告"""
if not self.vulnerability_reports:
return
try:
run_dir = self.get_run_dir()
vuln_dir = run_dir / "vulnerabilities"
vuln_dir.mkdir(exist_ok=True)
# 只保存新的报告
new_reports = [
report for report in self.vulnerability_reports
if report["id"] not in self._saved_vuln_ids
]
for report in new_reports:
vuln_file = vuln_dir / f"{report['id']}.md"
with vuln_file.open("w", encoding="utf-8") as f:
f.write(f"# {report['title']}\n\n")
f.write(f"**ID:** {report['id']}\n")
f.write(f"**严重性:** {report['severity'].upper()}\n")
f.write(f"**发现时间:** {report['timestamp']}\n")
if report.get("vulnerability_type"):
f.write(f"**漏洞类型:** {report['vulnerability_type']}\n")
if report.get("file_path"):
f.write(f"**文件位置:** {report['file_path']}\n")
f.write("\n## 详细描述\n\n")
f.write(f"{report['content']}\n")
self._saved_vuln_ids.add(report["id"])
# 保存漏洞索引 CSV
if self.vulnerability_reports:
csv_file = run_dir / "vulnerabilities.csv"
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
sorted_reports = sorted(
self.vulnerability_reports,
key=lambda x: (severity_order.get(x["severity"], 5), x["timestamp"]),
)
with csv_file.open("w", encoding="utf-8", newline="") as f:
fieldnames = ["id", "title", "severity", "type", "file", "timestamp"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for report in sorted_reports:
writer.writerow({
"id": report["id"],
"title": report["title"],
"severity": report["severity"].upper(),
"type": report.get("vulnerability_type", ""),
"file": report.get("file_path", ""),
"timestamp": report["timestamp"],
})
if new_reports:
logger.info(f"Saved {len(new_reports)} new vulnerability reports to {vuln_dir}")
except Exception as e:
logger.warning(f"Failed to save vulnerability reports: {e}")
def _save_metadata(self, run_dir: Path) -> None:
"""保存运行元数据"""
metadata_file = run_dir / "run_metadata.json"
metadata = {
**self.run_metadata,
"agents_count": len(self.agents),
"tool_executions_count": len(self.tool_executions),
"vulnerabilities_count": len(self.vulnerability_reports),
"duration_seconds": self._calculate_duration(),
}
with metadata_file.open("w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
def _calculate_duration(self) -> float:
"""计算运行时长"""
try:
start = datetime.fromisoformat(self.start_time)
if self.end_time:
end = datetime.fromisoformat(self.end_time)
else:
end = get_now()
return (end - start).total_seconds()
except (ValueError, TypeError):
return 0.0
# ============ 统计和查询 ============
def get_agent_tools(self, agent_id: str) -> List[Dict[str, Any]]:
"""获取 Agent 的工具执行记录"""
return [
exec_data for exec_data in self.tool_executions.values()
if exec_data.get("agent_id") == agent_id
]
def get_real_tool_count(self) -> int:
"""获取实际工具执行次数(排除系统工具)"""
system_tools = {"scan_start_info", "subagent_start_info"}
return sum(
1 for exec_data in self.tool_executions.values()
if exec_data.get("tool_name") not in system_tools
)
def get_statistics(self) -> Dict[str, Any]:
"""获取统计信息"""
agent_stats = {"running": 0, "completed": 0, "failed": 0, "stopped": 0}
for agent in self.agents.values():
status = agent.get("status", "unknown")
if status in agent_stats:
agent_stats[status] += 1
vuln_stats = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
for vuln in self.vulnerability_reports:
severity = vuln.get("severity", "medium")
if severity in vuln_stats:
vuln_stats[severity] += 1
return {
"agents": agent_stats,
"vulnerabilities": vuln_stats,
"total_agents": len(self.agents),
"total_vulnerabilities": len(self.vulnerability_reports),
"total_tool_executions": self.get_real_tool_count(),
"duration_seconds": self._calculate_duration(),
}
def cleanup(self) -> None:
"""清理并保存最终数据"""
self.save_run_data(mark_complete=True)