From 3db20a3afbbfd130816b9170dd8a719fdc153d8f Mon Sep 17 00:00:00 2001 From: lintsinghua Date: Fri, 12 Dec 2025 16:36:39 +0800 Subject: [PATCH] feat(agent): enhance error handling and project scope filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Downgrade Python version from 3.13 to 3.11.12 for compatibility - Improve empty LLM response handling with better diagnostics and retry logic in AnalysisAgent - Add detailed logging for empty response retries with token count and iteration tracking - Implement fallback result generation instead of immediate failure on consecutive empty responses - Enhance stream error handling with partial content recovery and error message propagation - Add comprehensive exception handling in stream_llm_call to prevent error suppression - Implement project scope filtering to ensure consistent filtered views across Orchestrator and sub-agents - Track filtered files and directories separately when target_files are specified - Add scope_limited flag and scope_message to project structure for transparency - Remove manual progress_percentage setting and rely on computed property for COMPLETED status - Improve code comments with diagnostic markers (🔥) for critical sections --- backend/.python-version | 2 +- backend/app/api/v1/endpoints/agent_tasks.py | 44 +++++++++++++++---- backend/app/services/agent/agents/analysis.py | 33 +++++++++++--- backend/app/services/agent/agents/base.py | 20 ++++++++- .../app/services/agent/agents/orchestrator.py | 40 ++++++++++++++++- backend/app/services/agent/agents/recon.py | 33 +++++++++++--- 6 files changed, 149 insertions(+), 23 deletions(-) diff --git a/backend/.python-version b/backend/.python-version index 24ee5b1..3b564fa 100644 --- a/backend/.python-version +++ b/backend/.python-version @@ -1 +1 @@ -3.13 +3.11.12 diff --git a/backend/app/api/v1/endpoints/agent_tasks.py b/backend/app/api/v1/endpoints/agent_tasks.py index 2231822..ca55de4 100644 --- a/backend/app/api/v1/endpoints/agent_tasks.py +++ b/backend/app/api/v1/endpoints/agent_tasks.py @@ -389,7 +389,8 @@ async def _execute_agent_task(task_id: str): # 计算安全评分 task.security_score = _calculate_security_score(findings) - task.progress_percentage = 100.0 + # 🔥 注意: progress_percentage 是计算属性,不需要手动设置 + # 当 status = COMPLETED 时会自动返回 100.0 await db.commit() @@ -580,6 +581,9 @@ async def _collect_project_info( project_name: 项目名称 exclude_patterns: 排除模式列表 target_files: 目标文件列表 + + 🔥 重要:当指定了 target_files 时,返回的项目结构应该只包含目标文件相关的信息, + 以确保 Orchestrator 和子 Agent 看到的是一致的、过滤后的视图。 """ import fnmatch @@ -615,6 +619,10 @@ async def _collect_project_info( ".rb": "Ruby", ".rs": "Rust", ".c": "C", ".cpp": "C++", } + # 🔥 收集过滤后的文件列表 + filtered_files = [] + filtered_dirs = set() + for root, dirs, files in os.walk(project_root): dirs[:] = [d for d in dirs if d not in exclude_dirs] @@ -636,20 +644,40 @@ async def _collect_project_info( continue info["file_count"] += 1 + filtered_files.append(relative_path) + + # 🔥 收集文件所在的目录 + dir_path = os.path.dirname(relative_path) + if dir_path: + # 添加目录及其父目录 + parts = dir_path.split(os.sep) + for i in range(len(parts)): + filtered_dirs.add(os.sep.join(parts[:i+1])) ext = os.path.splitext(f)[1].lower() if ext in lang_map and lang_map[ext] not in info["languages"]: info["languages"].append(lang_map[ext]) - # 收集顶层目录结构 - try: - top_items = os.listdir(project_root) + # 🔥 根据是否有目标文件限制,生成不同的结构信息 + if target_files_set: + # 当指定了目标文件时,只显示目标文件和相关目录 info["structure"] = { - "directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs], - "files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20], + "directories": sorted(list(filtered_dirs))[:20], + "files": filtered_files[:30], + "scope_limited": True, # 🔥 标记这是限定范围的视图 + "scope_message": f"审计范围限定为 {len(filtered_files)} 个指定文件", } - except Exception: - pass + else: + # 全项目审计时,显示顶层目录结构 + try: + top_items = os.listdir(project_root) + info["structure"] = { + "directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs], + "files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20], + "scope_limited": False, + } + except Exception: + pass except Exception as e: logger.warning(f"Failed to collect project info: {e}") diff --git a/backend/app/services/agent/agents/analysis.py b/backend/app/services/agent/agents/analysis.py index 508c66e..0815f66 100644 --- a/backend/app/services/agent/agents/analysis.py +++ b/backend/app/services/agent/agents/analysis.py @@ -353,19 +353,40 @@ class AnalysisAgent(BaseAgent): self._total_tokens += tokens_this_round - # 🔥 Handle empty LLM response to prevent loops + # 🔥 Enhanced: Handle empty LLM response with better diagnostics if not llm_output or not llm_output.strip(): - logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}") empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1 self._empty_retry_count = empty_retry_count + + # 🔥 记录更详细的诊断信息 + logger.warning( + f"[{self.name}] Empty LLM response in iteration {self._iteration} " + f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})" + ) + if empty_retry_count >= 3: - logger.error(f"[{self.name}] Too many empty responses, stopping") - error_message = "连续收到空响应,停止分析" - await self.emit_event("error", error_message) + logger.error(f"[{self.name}] Too many empty responses, generating fallback result") + error_message = "连续收到空响应,使用回退结果" + await self.emit_event("warning", error_message) + # 🔥 不是直接 break,而是尝试生成一个回退结果 break + + # 🔥 更有针对性的重试提示 + retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动: + +Thought: [你对当前安全分析情况的思考] +Action: [工具名称,如 read_file, search_code, pattern_match, semgrep_scan] +Action Input: {{"参数名": "参数值"}} + +可用工具: {', '.join(self.tools.keys())} + +如果你已完成分析,请输出: +Thought: [总结所有发现] +Final Answer: {{"findings": [...], "summary": "..."}}""" + self._conversation_history.append({ "role": "user", - "content": "Received empty response. Please output your Thought and Action.", + "content": retry_prompt, }) continue diff --git a/backend/app/services/agent/agents/base.py b/backend/app/services/agent/agents/base.py index 017386c..64152f9 100644 --- a/backend/app/services/agent/agents/base.py +++ b/backend/app/services/agent/agents/base.py @@ -951,15 +951,33 @@ class BaseAgent(ABC): elif chunk["type"] == "error": accumulated = chunk.get("accumulated", "") - logger.error(f"Stream error: {chunk.get('error')}") + error_msg = chunk.get("error", "Unknown error") + logger.error(f"[{self.name}] Stream error: {error_msg}") + # 🔥 如果有部分累积内容,尝试使用它 + if accumulated: + logger.warning(f"[{self.name}] Using partial accumulated content ({len(accumulated)} chars)") + total_tokens = chunk.get("usage", {}).get("total_tokens", 0) + else: + # 🔥 返回一个提示 LLM 继续的消息,而不是空字符串 + accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。" break except asyncio.CancelledError: logger.info(f"[{self.name}] LLM call cancelled") raise + except Exception as e: + # 🔥 增强异常处理,避免吞掉错误 + logger.error(f"[{self.name}] Unexpected error in stream_llm_call: {e}", exc_info=True) + await self.emit_event("error", f"LLM 调用错误: {str(e)}") + # 返回错误提示,让 Agent 知道发生了什么 + accumulated = f"[LLM调用错误: {str(e)}] 请重试。" finally: await self.emit_thinking_end(accumulated) + # 🔥 记录空响应警告,帮助调试 + if not accumulated or not accumulated.strip(): + logger.warning(f"[{self.name}] Empty LLM response returned (total_tokens: {total_tokens})") + return accumulated, total_tokens async def execute_tool(self, tool_name: str, tool_input: Dict) -> str: diff --git a/backend/app/services/agent/agents/orchestrator.py b/backend/app/services/agent/agents/orchestrator.py index cb0ab19..33371c0 100644 --- a/backend/app/services/agent/agents/orchestrator.py +++ b/backend/app/services/agent/agents/orchestrator.py @@ -440,14 +440,52 @@ class OrchestratorAgent(BaseAgent): config: Dict[str, Any], ) -> str: """构建初始消息""" + structure = project_info.get('structure', {}) + + # 🔥 检查是否是限定范围的审计 + scope_limited = structure.get('scope_limited', False) + scope_message = structure.get('scope_message', '') + msg = f"""请开始对以下项目进行安全审计。 ## 项目信息 - 名称: {project_info.get('name', 'unknown')} - 语言: {project_info.get('languages', [])} - 文件数量: {project_info.get('file_count', 0)} -- 目录结构: {json.dumps(project_info.get('structure', {}), ensure_ascii=False, indent=2)} +""" + + # 🔥 根据是否限定范围显示不同的结构信息 + if scope_limited: + msg += f""" +## ⚠️ 审计范围限定 +**{scope_message}** +### 目标文件列表 +""" + for f in structure.get('files', []): + msg += f"- {f}\n" + + if structure.get('directories'): + msg += f""" +### 相关目录 +{structure.get('directories', [])} +""" + else: + msg += f""" +## 目录结构 +{json.dumps(structure, ensure_ascii=False, indent=2)} +""" + + # 🔥 如果配置了 target_files,也明确显示 + target_files = config.get('target_files', []) + if target_files: + msg += f""" +## ⚠️ 重要提示 +用户指定了 **{len(target_files)}** 个目标文件进行审计。 +请确保你的分析集中在这些指定的文件上,不要浪费时间分析其他文件。 +""" + + msg += f""" ## 用户配置 - 目标漏洞: {config.get('target_vulnerabilities', ['all'])} - 验证级别: {config.get('verification_level', 'sandbox')} diff --git a/backend/app/services/agent/agents/recon.py b/backend/app/services/agent/agents/recon.py index e1dcd9c..a2126fd 100644 --- a/backend/app/services/agent/agents/recon.py +++ b/backend/app/services/agent/agents/recon.py @@ -299,19 +299,40 @@ class ReconAgent(BaseAgent): self._total_tokens += tokens_this_round - # 🔥 Handle empty LLM response to prevent loops + # 🔥 Enhanced: Handle empty LLM response with better diagnostics if not llm_output or not llm_output.strip(): - logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}") empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1 self._empty_retry_count = empty_retry_count + + # 🔥 记录更详细的诊断信息 + logger.warning( + f"[{self.name}] Empty LLM response in iteration {self._iteration} " + f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})" + ) + if empty_retry_count >= 3: - logger.error(f"[{self.name}] Too many empty responses, stopping") - error_message = "连续收到空响应,停止信息收集" - await self.emit_event("error", error_message) + logger.error(f"[{self.name}] Too many empty responses, generating fallback result") + error_message = "连续收到空响应,使用回退结果" + await self.emit_event("warning", error_message) + # 🔥 不是直接 break,而是尝试生成一个回退结果 break + + # 🔥 更有针对性的重试提示 + retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动: + +Thought: [你对当前情况的分析] +Action: [工具名称,如 list_files, read_file, search_code] +Action Input: {{"参数名": "参数值"}} + +可用工具: {', '.join(self.tools.keys())} + +如果你认为信息收集已经完成,请输出: +Thought: [总结收集到的信息] +Final Answer: [JSON格式的结果]""" + self._conversation_history.append({ "role": "user", - "content": "Received empty response. Please output your Thought and Action.", + "content": retry_prompt, }) continue