573 lines
20 KiB
Python
573 lines
20 KiB
Python
"""
|
|
Tracer - 审计追踪器
|
|
|
|
提供完整的审计过程追踪,包括:
|
|
- Agent 创建和状态变化
|
|
- 工具执行记录
|
|
- 漏洞报告管理
|
|
- 最终扫描结果
|
|
- 数据持久化
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import logging
|
|
from app.core.timezone import get_now, get_now_iso
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Dict, List, Optional
|
|
from uuid import uuid4
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 全局 Tracer 实例
|
|
_global_tracer: Optional["Tracer"] = None
|
|
|
|
|
|
def get_global_tracer() -> Optional["Tracer"]:
|
|
"""获取全局 Tracer 实例"""
|
|
return _global_tracer
|
|
|
|
|
|
def set_global_tracer(tracer: "Tracer") -> None:
|
|
"""设置全局 Tracer 实例"""
|
|
global _global_tracer
|
|
_global_tracer = tracer
|
|
|
|
|
|
class Tracer:
|
|
"""
|
|
审计追踪器
|
|
|
|
追踪整个审计过程,支持:
|
|
- Agent 生命周期追踪
|
|
- 工具执行记录
|
|
- 漏洞报告收集
|
|
- 数据持久化到文件
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
run_name: Optional[str] = None,
|
|
output_dir: Optional[Path] = None,
|
|
):
|
|
# 运行标识
|
|
self.run_name = run_name
|
|
self.run_id = run_name or f"run-{uuid4().hex[:8]}"
|
|
self.start_time = get_now_iso()
|
|
self.end_time: Optional[str] = None
|
|
|
|
# 追踪数据
|
|
self.agents: Dict[str, Dict[str, Any]] = {}
|
|
self.tool_executions: Dict[int, Dict[str, Any]] = {}
|
|
self.chat_messages: List[Dict[str, Any]] = []
|
|
|
|
# 漏洞报告
|
|
self.vulnerability_reports: List[Dict[str, Any]] = []
|
|
self.final_scan_result: Optional[str] = None
|
|
|
|
# 扫描配置和结果
|
|
self.scan_config: Optional[Dict[str, Any]] = None
|
|
self.scan_results: Optional[Dict[str, Any]] = None
|
|
|
|
# 元数据
|
|
self.run_metadata: Dict[str, Any] = {
|
|
"run_id": self.run_id,
|
|
"run_name": self.run_name,
|
|
"start_time": self.start_time,
|
|
"end_time": None,
|
|
"status": "running",
|
|
}
|
|
|
|
# 输出目录
|
|
self._output_dir = output_dir
|
|
self._run_dir: Optional[Path] = None
|
|
|
|
# 计数器
|
|
self._next_execution_id = 1
|
|
self._next_message_id = 1
|
|
self._saved_vuln_ids: set = set()
|
|
|
|
# 回调函数
|
|
self.vulnerability_found_callback: Optional[Callable[[str, str, str, str], None]] = None
|
|
self.agent_status_callback: Optional[Callable[[str, str], None]] = None
|
|
|
|
def set_run_name(self, run_name: str) -> None:
|
|
"""设置运行名称"""
|
|
self.run_name = run_name
|
|
self.run_id = run_name
|
|
self.run_metadata["run_name"] = run_name
|
|
self.run_metadata["run_id"] = run_name
|
|
|
|
def get_run_dir(self) -> Path:
|
|
"""获取运行输出目录"""
|
|
if self._run_dir is None:
|
|
if self._output_dir:
|
|
base_dir = self._output_dir
|
|
else:
|
|
base_dir = Path.cwd() / "audit_runs"
|
|
|
|
base_dir.mkdir(exist_ok=True)
|
|
|
|
run_dir_name = self.run_name or self.run_id
|
|
# 清理非法字符
|
|
run_dir_name = "".join(
|
|
c if c.isalnum() or c in "-_" else "_"
|
|
for c in run_dir_name
|
|
)
|
|
self._run_dir = base_dir / run_dir_name
|
|
self._run_dir.mkdir(exist_ok=True)
|
|
|
|
return self._run_dir
|
|
|
|
def set_scan_config(self, config: Dict[str, Any]) -> None:
|
|
"""设置扫描配置"""
|
|
self.scan_config = config
|
|
self.run_metadata.update({
|
|
"project_name": config.get("project_name", ""),
|
|
"scan_type": config.get("scan_type", ""),
|
|
"files_count": len(config.get("files", [])),
|
|
})
|
|
# 初始化输出目录
|
|
self.get_run_dir()
|
|
|
|
# ============ Agent 追踪 ============
|
|
|
|
def log_agent_creation(
|
|
self,
|
|
agent_id: str,
|
|
name: str,
|
|
task: str,
|
|
parent_id: Optional[str] = None,
|
|
agent_type: str = "generic",
|
|
) -> None:
|
|
"""记录 Agent 创建"""
|
|
agent_data = {
|
|
"id": agent_id,
|
|
"name": name,
|
|
"task": task,
|
|
"type": agent_type,
|
|
"status": "running",
|
|
"parent_id": parent_id,
|
|
"created_at": get_now_iso(),
|
|
"updated_at": get_now_iso(),
|
|
"tool_executions": [],
|
|
"findings_count": 0,
|
|
}
|
|
|
|
self.agents[agent_id] = agent_data
|
|
logger.debug(f"Tracer: Agent created - {name} ({agent_id})")
|
|
|
|
def update_agent_status(
|
|
self,
|
|
agent_id: str,
|
|
status: str,
|
|
error_message: Optional[str] = None,
|
|
) -> None:
|
|
"""更新 Agent 状态"""
|
|
if agent_id in self.agents:
|
|
self.agents[agent_id]["status"] = status
|
|
self.agents[agent_id]["updated_at"] = get_now_iso()
|
|
|
|
if error_message:
|
|
self.agents[agent_id]["error_message"] = error_message
|
|
|
|
if status in ["completed", "failed", "stopped"]:
|
|
self.agents[agent_id]["finished_at"] = get_now_iso()
|
|
|
|
# 触发回调
|
|
if self.agent_status_callback:
|
|
try:
|
|
self.agent_status_callback(agent_id, status)
|
|
except Exception as e:
|
|
logger.warning(f"Agent status callback failed: {e}")
|
|
|
|
# ============ 工具执行追踪 ============
|
|
|
|
def log_tool_execution_start(
|
|
self,
|
|
agent_id: str,
|
|
tool_name: str,
|
|
args: Dict[str, Any],
|
|
) -> int:
|
|
"""记录工具执行开始"""
|
|
execution_id = self._next_execution_id
|
|
self._next_execution_id += 1
|
|
|
|
now = get_now_iso()
|
|
|
|
# 清理过大的参数
|
|
cleaned_args = self._clean_args(args)
|
|
|
|
execution_data = {
|
|
"execution_id": execution_id,
|
|
"agent_id": agent_id,
|
|
"tool_name": tool_name,
|
|
"args": cleaned_args,
|
|
"status": "running",
|
|
"result": None,
|
|
"started_at": now,
|
|
"completed_at": None,
|
|
}
|
|
|
|
self.tool_executions[execution_id] = execution_data
|
|
|
|
# 关联到 Agent
|
|
if agent_id in self.agents:
|
|
self.agents[agent_id]["tool_executions"].append(execution_id)
|
|
|
|
return execution_id
|
|
|
|
def update_tool_execution(
|
|
self,
|
|
execution_id: int,
|
|
status: str,
|
|
result: Any = None,
|
|
) -> None:
|
|
"""更新工具执行状态"""
|
|
if execution_id in self.tool_executions:
|
|
self.tool_executions[execution_id]["status"] = status
|
|
self.tool_executions[execution_id]["completed_at"] = get_now_iso()
|
|
|
|
# 清理过大的结果
|
|
if result is not None:
|
|
self.tool_executions[execution_id]["result"] = self._clean_result(result)
|
|
|
|
def _clean_args(self, args: Dict[str, Any], max_length: int = 1000) -> Dict[str, Any]:
|
|
"""清理参数,限制长度"""
|
|
cleaned = {}
|
|
for key, value in args.items():
|
|
if isinstance(value, str) and len(value) > max_length:
|
|
cleaned[key] = value[:max_length] + "... [truncated]"
|
|
elif isinstance(value, (list, dict)):
|
|
try:
|
|
serialized = json.dumps(value, ensure_ascii=False)
|
|
if len(serialized) > max_length:
|
|
cleaned[key] = f"[{type(value).__name__} with {len(value)} items, truncated]"
|
|
else:
|
|
cleaned[key] = value
|
|
except (TypeError, ValueError):
|
|
cleaned[key] = str(value)[:max_length]
|
|
else:
|
|
cleaned[key] = value
|
|
return cleaned
|
|
|
|
def _clean_result(self, result: Any, max_length: int = 2000) -> Any:
|
|
"""清理结果,限制长度"""
|
|
if isinstance(result, str):
|
|
if len(result) > max_length:
|
|
return result[:max_length] + "... [truncated]"
|
|
return result
|
|
|
|
if isinstance(result, dict):
|
|
cleaned = {}
|
|
for key, value in result.items():
|
|
cleaned[key] = self._clean_result(value, max_length // 2)
|
|
return cleaned
|
|
|
|
if isinstance(result, list):
|
|
if len(result) > 20:
|
|
return [self._clean_result(item, max_length // 4) for item in result[:20]] + [
|
|
f"... and {len(result) - 20} more items"
|
|
]
|
|
return [self._clean_result(item, max_length // 2) for item in result]
|
|
|
|
return result
|
|
|
|
# ============ 消息追踪 ============
|
|
|
|
def log_chat_message(
|
|
self,
|
|
content: str,
|
|
role: str,
|
|
agent_id: Optional[str] = None,
|
|
metadata: Optional[Dict[str, Any]] = None,
|
|
) -> int:
|
|
"""记录聊天消息"""
|
|
message_id = self._next_message_id
|
|
self._next_message_id += 1
|
|
|
|
# 清理过长的内容
|
|
if len(content) > 5000:
|
|
content = content[:5000] + "... [truncated]"
|
|
|
|
message_data = {
|
|
"message_id": message_id,
|
|
"content": content,
|
|
"role": role,
|
|
"agent_id": agent_id,
|
|
"timestamp": get_now_iso(),
|
|
"metadata": metadata or {},
|
|
}
|
|
|
|
self.chat_messages.append(message_data)
|
|
return message_id
|
|
|
|
# ============ 漏洞报告 ============
|
|
|
|
def add_vulnerability_report(
|
|
self,
|
|
title: str,
|
|
content: str,
|
|
severity: str,
|
|
agent_id: Optional[str] = None,
|
|
vulnerability_type: Optional[str] = None,
|
|
file_path: Optional[str] = None,
|
|
) -> str:
|
|
"""添加漏洞报告"""
|
|
report_id = f"vuln-{len(self.vulnerability_reports) + 1:04d}"
|
|
|
|
report = {
|
|
"id": report_id,
|
|
"title": title.strip(),
|
|
"content": content.strip(),
|
|
"severity": severity.lower().strip(),
|
|
"vulnerability_type": vulnerability_type,
|
|
"file_path": file_path,
|
|
"agent_id": agent_id,
|
|
"timestamp": get_now().strftime("%Y-%m-%d %H:%M:%S") + " CST",
|
|
}
|
|
|
|
self.vulnerability_reports.append(report)
|
|
logger.info(f"Tracer: Vulnerability report added - {report_id}: {title}")
|
|
|
|
# 更新 Agent 统计
|
|
if agent_id and agent_id in self.agents:
|
|
self.agents[agent_id]["findings_count"] = (
|
|
self.agents[agent_id].get("findings_count", 0) + 1
|
|
)
|
|
|
|
# 触发回调
|
|
if self.vulnerability_found_callback:
|
|
try:
|
|
self.vulnerability_found_callback(
|
|
report_id,
|
|
title.strip(),
|
|
content.strip(),
|
|
severity.lower().strip(),
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Vulnerability callback failed: {e}")
|
|
|
|
# 自动保存
|
|
self._save_vulnerability_reports()
|
|
|
|
return report_id
|
|
|
|
def set_final_scan_result(
|
|
self,
|
|
content: str,
|
|
success: bool = True,
|
|
) -> None:
|
|
"""设置最终扫描结果"""
|
|
self.final_scan_result = content.strip()
|
|
|
|
self.scan_results = {
|
|
"scan_completed": True,
|
|
"content": content,
|
|
"success": success,
|
|
"completed_at": get_now_iso(),
|
|
"total_vulnerabilities": len(self.vulnerability_reports),
|
|
}
|
|
|
|
self.run_metadata["status"] = "completed" if success else "failed"
|
|
self.end_time = get_now_iso()
|
|
self.run_metadata["end_time"] = self.end_time
|
|
|
|
logger.info(f"Tracer: Final scan result set, success={success}")
|
|
|
|
# 保存所有数据
|
|
self.save_run_data(mark_complete=True)
|
|
|
|
# ============ 数据持久化 ============
|
|
|
|
def save_run_data(self, mark_complete: bool = False) -> None:
|
|
"""保存运行数据"""
|
|
try:
|
|
run_dir = self.get_run_dir()
|
|
|
|
if mark_complete:
|
|
self.end_time = get_now_iso()
|
|
self.run_metadata["end_time"] = self.end_time
|
|
|
|
# 保存最终报告
|
|
if self.final_scan_result:
|
|
self._save_final_report(run_dir)
|
|
|
|
# 保存漏洞报告
|
|
self._save_vulnerability_reports()
|
|
|
|
# 保存运行元数据
|
|
self._save_metadata(run_dir)
|
|
|
|
logger.info(f"Tracer: Run data saved to {run_dir}")
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Failed to save run data: {e}")
|
|
|
|
def _save_final_report(self, run_dir: Path) -> None:
|
|
"""保存最终报告"""
|
|
report_file = run_dir / "security_audit_report.md"
|
|
|
|
with report_file.open("w", encoding="utf-8") as f:
|
|
f.write("# 安全审计报告\n\n")
|
|
f.write(f"**生成时间:** {get_now().strftime('%Y-%m-%d %H:%M:%S')} CST\n")
|
|
f.write(f"**运行ID:** {self.run_id}\n\n")
|
|
|
|
# 统计信息
|
|
f.write("## 审计概述\n\n")
|
|
f.write(f"- 发现漏洞数: {len(self.vulnerability_reports)}\n")
|
|
f.write(f"- 参与Agent数: {len(self.agents)}\n")
|
|
f.write(f"- 工具调用数: {len(self.tool_executions)}\n\n")
|
|
|
|
# 漏洞统计
|
|
if self.vulnerability_reports:
|
|
severity_counts = {}
|
|
for vuln in self.vulnerability_reports:
|
|
severity = vuln.get("severity", "unknown")
|
|
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
|
|
|
f.write("### 漏洞严重性分布\n\n")
|
|
for severity, count in sorted(severity_counts.items()):
|
|
f.write(f"- {severity.upper()}: {count}\n")
|
|
f.write("\n")
|
|
|
|
f.write("---\n\n")
|
|
f.write(f"{self.final_scan_result}\n")
|
|
|
|
logger.info(f"Saved final report to: {report_file}")
|
|
|
|
def _save_vulnerability_reports(self) -> None:
|
|
"""保存漏洞报告"""
|
|
if not self.vulnerability_reports:
|
|
return
|
|
|
|
try:
|
|
run_dir = self.get_run_dir()
|
|
vuln_dir = run_dir / "vulnerabilities"
|
|
vuln_dir.mkdir(exist_ok=True)
|
|
|
|
# 只保存新的报告
|
|
new_reports = [
|
|
report for report in self.vulnerability_reports
|
|
if report["id"] not in self._saved_vuln_ids
|
|
]
|
|
|
|
for report in new_reports:
|
|
vuln_file = vuln_dir / f"{report['id']}.md"
|
|
with vuln_file.open("w", encoding="utf-8") as f:
|
|
f.write(f"# {report['title']}\n\n")
|
|
f.write(f"**ID:** {report['id']}\n")
|
|
f.write(f"**严重性:** {report['severity'].upper()}\n")
|
|
f.write(f"**发现时间:** {report['timestamp']}\n")
|
|
|
|
if report.get("vulnerability_type"):
|
|
f.write(f"**漏洞类型:** {report['vulnerability_type']}\n")
|
|
if report.get("file_path"):
|
|
f.write(f"**文件位置:** {report['file_path']}\n")
|
|
|
|
f.write("\n## 详细描述\n\n")
|
|
f.write(f"{report['content']}\n")
|
|
|
|
self._saved_vuln_ids.add(report["id"])
|
|
|
|
# 保存漏洞索引 CSV
|
|
if self.vulnerability_reports:
|
|
csv_file = run_dir / "vulnerabilities.csv"
|
|
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
|
sorted_reports = sorted(
|
|
self.vulnerability_reports,
|
|
key=lambda x: (severity_order.get(x["severity"], 5), x["timestamp"]),
|
|
)
|
|
|
|
with csv_file.open("w", encoding="utf-8", newline="") as f:
|
|
fieldnames = ["id", "title", "severity", "type", "file", "timestamp"]
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
|
|
for report in sorted_reports:
|
|
writer.writerow({
|
|
"id": report["id"],
|
|
"title": report["title"],
|
|
"severity": report["severity"].upper(),
|
|
"type": report.get("vulnerability_type", ""),
|
|
"file": report.get("file_path", ""),
|
|
"timestamp": report["timestamp"],
|
|
})
|
|
|
|
if new_reports:
|
|
logger.info(f"Saved {len(new_reports)} new vulnerability reports to {vuln_dir}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to save vulnerability reports: {e}")
|
|
|
|
def _save_metadata(self, run_dir: Path) -> None:
|
|
"""保存运行元数据"""
|
|
metadata_file = run_dir / "run_metadata.json"
|
|
|
|
metadata = {
|
|
**self.run_metadata,
|
|
"agents_count": len(self.agents),
|
|
"tool_executions_count": len(self.tool_executions),
|
|
"vulnerabilities_count": len(self.vulnerability_reports),
|
|
"duration_seconds": self._calculate_duration(),
|
|
}
|
|
|
|
with metadata_file.open("w", encoding="utf-8") as f:
|
|
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
|
|
|
def _calculate_duration(self) -> float:
|
|
"""计算运行时长"""
|
|
try:
|
|
start = datetime.fromisoformat(self.start_time)
|
|
if self.end_time:
|
|
end = datetime.fromisoformat(self.end_time)
|
|
else:
|
|
end = get_now()
|
|
return (end - start).total_seconds()
|
|
except (ValueError, TypeError):
|
|
return 0.0
|
|
|
|
# ============ 统计和查询 ============
|
|
|
|
def get_agent_tools(self, agent_id: str) -> List[Dict[str, Any]]:
|
|
"""获取 Agent 的工具执行记录"""
|
|
return [
|
|
exec_data for exec_data in self.tool_executions.values()
|
|
if exec_data.get("agent_id") == agent_id
|
|
]
|
|
|
|
def get_real_tool_count(self) -> int:
|
|
"""获取实际工具执行次数(排除系统工具)"""
|
|
system_tools = {"scan_start_info", "subagent_start_info"}
|
|
return sum(
|
|
1 for exec_data in self.tool_executions.values()
|
|
if exec_data.get("tool_name") not in system_tools
|
|
)
|
|
|
|
def get_statistics(self) -> Dict[str, Any]:
|
|
"""获取统计信息"""
|
|
agent_stats = {"running": 0, "completed": 0, "failed": 0, "stopped": 0}
|
|
for agent in self.agents.values():
|
|
status = agent.get("status", "unknown")
|
|
if status in agent_stats:
|
|
agent_stats[status] += 1
|
|
|
|
vuln_stats = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
|
|
for vuln in self.vulnerability_reports:
|
|
severity = vuln.get("severity", "medium")
|
|
if severity in vuln_stats:
|
|
vuln_stats[severity] += 1
|
|
|
|
return {
|
|
"agents": agent_stats,
|
|
"vulnerabilities": vuln_stats,
|
|
"total_agents": len(self.agents),
|
|
"total_vulnerabilities": len(self.vulnerability_reports),
|
|
"total_tool_executions": self.get_real_tool_count(),
|
|
"duration_seconds": self._calculate_duration(),
|
|
}
|
|
|
|
def cleanup(self) -> None:
|
|
"""清理并保存最终数据"""
|
|
self.save_run_data(mark_complete=True)
|