feat(agent): enhance error handling and project scope filtering

- Downgrade Python version from 3.13 to 3.11.12 for compatibility
- Improve empty LLM response handling with better diagnostics and retry logic in AnalysisAgent
- Add detailed logging for empty response retries with token count and iteration tracking
- Implement fallback result generation instead of immediate failure on consecutive empty responses
- Enhance stream error handling with partial content recovery and error message propagation
- Add comprehensive exception handling in stream_llm_call to prevent error suppression
- Implement project scope filtering to ensure consistent filtered views across Orchestrator and sub-agents
- Track filtered files and directories separately when target_files are specified
- Add scope_limited flag and scope_message to project structure for transparency
- Remove manual progress_percentage setting and rely on computed property for COMPLETED status
- Improve code comments with diagnostic markers (🔥) for critical sections
This commit is contained in:
lintsinghua 2025-12-12 16:36:39 +08:00
parent 31dc476015
commit 3db20a3afb
6 changed files with 149 additions and 23 deletions

View File

@ -1 +1 @@
3.13 3.11.12

View File

@ -389,7 +389,8 @@ async def _execute_agent_task(task_id: str):
# 计算安全评分 # 计算安全评分
task.security_score = _calculate_security_score(findings) task.security_score = _calculate_security_score(findings)
task.progress_percentage = 100.0 # 🔥 注意: progress_percentage 是计算属性,不需要手动设置
# 当 status = COMPLETED 时会自动返回 100.0
await db.commit() await db.commit()
@ -580,6 +581,9 @@ async def _collect_project_info(
project_name: 项目名称 project_name: 项目名称
exclude_patterns: 排除模式列表 exclude_patterns: 排除模式列表
target_files: 目标文件列表 target_files: 目标文件列表
🔥 重要当指定了 target_files 返回的项目结构应该只包含目标文件相关的信息
以确保 Orchestrator 和子 Agent 看到的是一致的过滤后的视图
""" """
import fnmatch import fnmatch
@ -615,6 +619,10 @@ async def _collect_project_info(
".rb": "Ruby", ".rs": "Rust", ".c": "C", ".cpp": "C++", ".rb": "Ruby", ".rs": "Rust", ".c": "C", ".cpp": "C++",
} }
# 🔥 收集过滤后的文件列表
filtered_files = []
filtered_dirs = set()
for root, dirs, files in os.walk(project_root): for root, dirs, files in os.walk(project_root):
dirs[:] = [d for d in dirs if d not in exclude_dirs] dirs[:] = [d for d in dirs if d not in exclude_dirs]
@ -636,20 +644,40 @@ async def _collect_project_info(
continue continue
info["file_count"] += 1 info["file_count"] += 1
filtered_files.append(relative_path)
# 🔥 收集文件所在的目录
dir_path = os.path.dirname(relative_path)
if dir_path:
# 添加目录及其父目录
parts = dir_path.split(os.sep)
for i in range(len(parts)):
filtered_dirs.add(os.sep.join(parts[:i+1]))
ext = os.path.splitext(f)[1].lower() ext = os.path.splitext(f)[1].lower()
if ext in lang_map and lang_map[ext] not in info["languages"]: if ext in lang_map and lang_map[ext] not in info["languages"]:
info["languages"].append(lang_map[ext]) info["languages"].append(lang_map[ext])
# 收集顶层目录结构 # 🔥 根据是否有目标文件限制,生成不同的结构信息
try: if target_files_set:
top_items = os.listdir(project_root) # 当指定了目标文件时,只显示目标文件和相关目录
info["structure"] = { info["structure"] = {
"directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs], "directories": sorted(list(filtered_dirs))[:20],
"files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20], "files": filtered_files[:30],
"scope_limited": True, # 🔥 标记这是限定范围的视图
"scope_message": f"审计范围限定为 {len(filtered_files)} 个指定文件",
} }
except Exception: else:
pass # 全项目审计时,显示顶层目录结构
try:
top_items = os.listdir(project_root)
info["structure"] = {
"directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs],
"files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20],
"scope_limited": False,
}
except Exception:
pass
except Exception as e: except Exception as e:
logger.warning(f"Failed to collect project info: {e}") logger.warning(f"Failed to collect project info: {e}")

View File

@ -353,19 +353,40 @@ class AnalysisAgent(BaseAgent):
self._total_tokens += tokens_this_round self._total_tokens += tokens_this_round
# 🔥 Handle empty LLM response to prevent loops # 🔥 Enhanced: Handle empty LLM response with better diagnostics
if not llm_output or not llm_output.strip(): if not llm_output or not llm_output.strip():
logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}")
empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1 empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1
self._empty_retry_count = empty_retry_count self._empty_retry_count = empty_retry_count
# 🔥 记录更详细的诊断信息
logger.warning(
f"[{self.name}] Empty LLM response in iteration {self._iteration} "
f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})"
)
if empty_retry_count >= 3: if empty_retry_count >= 3:
logger.error(f"[{self.name}] Too many empty responses, stopping") logger.error(f"[{self.name}] Too many empty responses, generating fallback result")
error_message = "连续收到空响应,停止分析" error_message = "连续收到空响应,使用回退结果"
await self.emit_event("error", error_message) await self.emit_event("warning", error_message)
# 🔥 不是直接 break而是尝试生成一个回退结果
break break
# 🔥 更有针对性的重试提示
retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动:
Thought: [你对当前安全分析情况的思考]
Action: [工具名称 read_file, search_code, pattern_match, semgrep_scan]
Action Input: {{"参数名": "参数值"}}
可用工具: {', '.join(self.tools.keys())}
如果你已完成分析请输出
Thought: [总结所有发现]
Final Answer: {{"findings": [...], "summary": "..."}}"""
self._conversation_history.append({ self._conversation_history.append({
"role": "user", "role": "user",
"content": "Received empty response. Please output your Thought and Action.", "content": retry_prompt,
}) })
continue continue

View File

@ -951,15 +951,33 @@ class BaseAgent(ABC):
elif chunk["type"] == "error": elif chunk["type"] == "error":
accumulated = chunk.get("accumulated", "") accumulated = chunk.get("accumulated", "")
logger.error(f"Stream error: {chunk.get('error')}") error_msg = chunk.get("error", "Unknown error")
logger.error(f"[{self.name}] Stream error: {error_msg}")
# 🔥 如果有部分累积内容,尝试使用它
if accumulated:
logger.warning(f"[{self.name}] Using partial accumulated content ({len(accumulated)} chars)")
total_tokens = chunk.get("usage", {}).get("total_tokens", 0)
else:
# 🔥 返回一个提示 LLM 继续的消息,而不是空字符串
accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。"
break break
except asyncio.CancelledError: except asyncio.CancelledError:
logger.info(f"[{self.name}] LLM call cancelled") logger.info(f"[{self.name}] LLM call cancelled")
raise raise
except Exception as e:
# 🔥 增强异常处理,避免吞掉错误
logger.error(f"[{self.name}] Unexpected error in stream_llm_call: {e}", exc_info=True)
await self.emit_event("error", f"LLM 调用错误: {str(e)}")
# 返回错误提示,让 Agent 知道发生了什么
accumulated = f"[LLM调用错误: {str(e)}] 请重试。"
finally: finally:
await self.emit_thinking_end(accumulated) await self.emit_thinking_end(accumulated)
# 🔥 记录空响应警告,帮助调试
if not accumulated or not accumulated.strip():
logger.warning(f"[{self.name}] Empty LLM response returned (total_tokens: {total_tokens})")
return accumulated, total_tokens return accumulated, total_tokens
async def execute_tool(self, tool_name: str, tool_input: Dict) -> str: async def execute_tool(self, tool_name: str, tool_input: Dict) -> str:

View File

@ -440,14 +440,52 @@ class OrchestratorAgent(BaseAgent):
config: Dict[str, Any], config: Dict[str, Any],
) -> str: ) -> str:
"""构建初始消息""" """构建初始消息"""
structure = project_info.get('structure', {})
# 🔥 检查是否是限定范围的审计
scope_limited = structure.get('scope_limited', False)
scope_message = structure.get('scope_message', '')
msg = f"""请开始对以下项目进行安全审计。 msg = f"""请开始对以下项目进行安全审计。
## 项目信息 ## 项目信息
- 名称: {project_info.get('name', 'unknown')} - 名称: {project_info.get('name', 'unknown')}
- 语言: {project_info.get('languages', [])} - 语言: {project_info.get('languages', [])}
- 文件数量: {project_info.get('file_count', 0)} - 文件数量: {project_info.get('file_count', 0)}
- 目录结构: {json.dumps(project_info.get('structure', {}), ensure_ascii=False, indent=2)} """
# 🔥 根据是否限定范围显示不同的结构信息
if scope_limited:
msg += f"""
## ⚠️ 审计范围限定
**{scope_message}**
### 目标文件列表
"""
for f in structure.get('files', []):
msg += f"- {f}\n"
if structure.get('directories'):
msg += f"""
### 相关目录
{structure.get('directories', [])}
"""
else:
msg += f"""
## 目录结构
{json.dumps(structure, ensure_ascii=False, indent=2)}
"""
# 🔥 如果配置了 target_files也明确显示
target_files = config.get('target_files', [])
if target_files:
msg += f"""
## ⚠️ 重要提示
用户指定了 **{len(target_files)}** 个目标文件进行审计
请确保你的分析集中在这些指定的文件上不要浪费时间分析其他文件
"""
msg += f"""
## 用户配置 ## 用户配置
- 目标漏洞: {config.get('target_vulnerabilities', ['all'])} - 目标漏洞: {config.get('target_vulnerabilities', ['all'])}
- 验证级别: {config.get('verification_level', 'sandbox')} - 验证级别: {config.get('verification_level', 'sandbox')}

View File

@ -299,19 +299,40 @@ class ReconAgent(BaseAgent):
self._total_tokens += tokens_this_round self._total_tokens += tokens_this_round
# 🔥 Handle empty LLM response to prevent loops # 🔥 Enhanced: Handle empty LLM response with better diagnostics
if not llm_output or not llm_output.strip(): if not llm_output or not llm_output.strip():
logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}")
empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1 empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1
self._empty_retry_count = empty_retry_count self._empty_retry_count = empty_retry_count
# 🔥 记录更详细的诊断信息
logger.warning(
f"[{self.name}] Empty LLM response in iteration {self._iteration} "
f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})"
)
if empty_retry_count >= 3: if empty_retry_count >= 3:
logger.error(f"[{self.name}] Too many empty responses, stopping") logger.error(f"[{self.name}] Too many empty responses, generating fallback result")
error_message = "连续收到空响应,停止信息收集" error_message = "连续收到空响应,使用回退结果"
await self.emit_event("error", error_message) await self.emit_event("warning", error_message)
# 🔥 不是直接 break而是尝试生成一个回退结果
break break
# 🔥 更有针对性的重试提示
retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动:
Thought: [你对当前情况的分析]
Action: [工具名称 list_files, read_file, search_code]
Action Input: {{"参数名": "参数值"}}
可用工具: {', '.join(self.tools.keys())}
如果你认为信息收集已经完成请输出
Thought: [总结收集到的信息]
Final Answer: [JSON格式的结果]"""
self._conversation_history.append({ self._conversation_history.append({
"role": "user", "role": "user",
"content": "Received empty response. Please output your Thought and Action.", "content": retry_prompt,
}) })
continue continue