From 3db20a3afbbfd130816b9170dd8a719fdc153d8f Mon Sep 17 00:00:00 2001
From: lintsinghua <lintsinghua@users.noreply.github.com>
Date: Fri, 12 Dec 2025 16:36:39 +0800
Subject: [PATCH] feat(agent): enhance error handling and project scope
 filtering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Downgrade Python version from 3.13 to 3.11.12 for compatibility
- Improve empty LLM response handling with better diagnostics and retry logic in AnalysisAgent
- Add detailed logging for empty response retries with token count and iteration tracking
- Implement fallback result generation instead of immediate failure on consecutive empty responses
- Enhance stream error handling with partial content recovery and error message propagation
- Add comprehensive exception handling in stream_llm_call to prevent error suppression
- Implement project scope filtering to ensure consistent filtered views across Orchestrator and sub-agents
- Track filtered files and directories separately when target_files are specified
- Add scope_limited flag and scope_message to project structure for transparency
- Remove manual progress_percentage setting and rely on computed property for COMPLETED status
- Improve code comments with diagnostic markers (🔥) for critical sections
---
 backend/.python-version                       |  2 +-
 backend/app/api/v1/endpoints/agent_tasks.py   | 44 +++++++++++++++----
 backend/app/services/agent/agents/analysis.py | 33 +++++++++++---
 backend/app/services/agent/agents/base.py     | 20 ++++++++-
 .../app/services/agent/agents/orchestrator.py | 40 ++++++++++++++++-
 backend/app/services/agent/agents/recon.py    | 33 +++++++++++---
 6 files changed, 149 insertions(+), 23 deletions(-)

diff --git a/backend/.python-version b/backend/.python-version
index 24ee5b1..3b564fa 100644
--- a/backend/.python-version
+++ b/backend/.python-version
@@ -1 +1 @@
-3.13
+3.11.12
diff --git a/backend/app/api/v1/endpoints/agent_tasks.py b/backend/app/api/v1/endpoints/agent_tasks.py
index 2231822..ca55de4 100644
--- a/backend/app/api/v1/endpoints/agent_tasks.py
+++ b/backend/app/api/v1/endpoints/agent_tasks.py
@@ -389,7 +389,8 @@ async def _execute_agent_task(task_id: str):
                 
                 # 计算安全评分
                 task.security_score = _calculate_security_score(findings)
-                task.progress_percentage = 100.0
+                # 🔥 注意: progress_percentage 是计算属性，不需要手动设置
+                # 当 status = COMPLETED 时会自动返回 100.0
                 
                 await db.commit()
                 
@@ -580,6 +581,9 @@ async def _collect_project_info(
         project_name: 项目名称
         exclude_patterns: 排除模式列表
         target_files: 目标文件列表
+    
+    🔥 重要：当指定了 target_files 时，返回的项目结构应该只包含目标文件相关的信息，
+    以确保 Orchestrator 和子 Agent 看到的是一致的、过滤后的视图。
     """
     import fnmatch
     
@@ -615,6 +619,10 @@ async def _collect_project_info(
             ".rb": "Ruby", ".rs": "Rust", ".c": "C", ".cpp": "C++",
         }
         
+        # 🔥 收集过滤后的文件列表
+        filtered_files = []
+        filtered_dirs = set()
+        
         for root, dirs, files in os.walk(project_root):
             dirs[:] = [d for d in dirs if d not in exclude_dirs]
             
@@ -636,20 +644,40 @@ async def _collect_project_info(
                     continue
                 
                 info["file_count"] += 1
+                filtered_files.append(relative_path)
+                
+                # 🔥 收集文件所在的目录
+                dir_path = os.path.dirname(relative_path)
+                if dir_path:
+                    # 添加目录及其父目录
+                    parts = dir_path.split(os.sep)
+                    for i in range(len(parts)):
+                        filtered_dirs.add(os.sep.join(parts[:i+1]))
                 
                 ext = os.path.splitext(f)[1].lower()
                 if ext in lang_map and lang_map[ext] not in info["languages"]:
                     info["languages"].append(lang_map[ext])
         
-        # 收集顶层目录结构
-        try:
-            top_items = os.listdir(project_root)
+        # 🔥 根据是否有目标文件限制，生成不同的结构信息
+        if target_files_set:
+            # 当指定了目标文件时，只显示目标文件和相关目录
             info["structure"] = {
-                "directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs],
-                "files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20],
+                "directories": sorted(list(filtered_dirs))[:20],
+                "files": filtered_files[:30],
+                "scope_limited": True,  # 🔥 标记这是限定范围的视图
+                "scope_message": f"审计范围限定为 {len(filtered_files)} 个指定文件",
             }
-        except Exception:
-            pass
+        else:
+            # 全项目审计时，显示顶层目录结构
+            try:
+                top_items = os.listdir(project_root)
+                info["structure"] = {
+                    "directories": [d for d in top_items if os.path.isdir(os.path.join(project_root, d)) and d not in exclude_dirs],
+                    "files": [f for f in top_items if os.path.isfile(os.path.join(project_root, f))][:20],
+                    "scope_limited": False,
+                }
+            except Exception:
+                pass
             
     except Exception as e:
         logger.warning(f"Failed to collect project info: {e}")
diff --git a/backend/app/services/agent/agents/analysis.py b/backend/app/services/agent/agents/analysis.py
index 508c66e..0815f66 100644
--- a/backend/app/services/agent/agents/analysis.py
+++ b/backend/app/services/agent/agents/analysis.py
@@ -353,19 +353,40 @@ class AnalysisAgent(BaseAgent):
                 
                 self._total_tokens += tokens_this_round
 
-                # 🔥 Handle empty LLM response to prevent loops
+                # 🔥 Enhanced: Handle empty LLM response with better diagnostics
                 if not llm_output or not llm_output.strip():
-                    logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}")
                     empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1
                     self._empty_retry_count = empty_retry_count
+                    
+                    # 🔥 记录更详细的诊断信息
+                    logger.warning(
+                        f"[{self.name}] Empty LLM response in iteration {self._iteration} "
+                        f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})"
+                    )
+                    
                     if empty_retry_count >= 3:
-                        logger.error(f"[{self.name}] Too many empty responses, stopping")
-                        error_message = "连续收到空响应，停止分析"
-                        await self.emit_event("error", error_message)
+                        logger.error(f"[{self.name}] Too many empty responses, generating fallback result")
+                        error_message = "连续收到空响应，使用回退结果"
+                        await self.emit_event("warning", error_message)
+                        # 🔥 不是直接 break，而是尝试生成一个回退结果
                         break
+                    
+                    # 🔥 更有针对性的重试提示
+                    retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动：
+
+Thought: [你对当前安全分析情况的思考]
+Action: [工具名称，如 read_file, search_code, pattern_match, semgrep_scan]
+Action Input: {{"参数名": "参数值"}}
+
+可用工具: {', '.join(self.tools.keys())}
+
+如果你已完成分析，请输出：
+Thought: [总结所有发现]
+Final Answer: {{"findings": [...], "summary": "..."}}"""
+                    
                     self._conversation_history.append({
                         "role": "user",
-                        "content": "Received empty response. Please output your Thought and Action.",
+                        "content": retry_prompt,
                     })
                     continue
                 
diff --git a/backend/app/services/agent/agents/base.py b/backend/app/services/agent/agents/base.py
index 017386c..64152f9 100644
--- a/backend/app/services/agent/agents/base.py
+++ b/backend/app/services/agent/agents/base.py
@@ -951,15 +951,33 @@ class BaseAgent(ABC):
                     
                 elif chunk["type"] == "error":
                     accumulated = chunk.get("accumulated", "")
-                    logger.error(f"Stream error: {chunk.get('error')}")
+                    error_msg = chunk.get("error", "Unknown error")
+                    logger.error(f"[{self.name}] Stream error: {error_msg}")
+                    # 🔥 如果有部分累积内容，尝试使用它
+                    if accumulated:
+                        logger.warning(f"[{self.name}] Using partial accumulated content ({len(accumulated)} chars)")
+                        total_tokens = chunk.get("usage", {}).get("total_tokens", 0)
+                    else:
+                        # 🔥 返回一个提示 LLM 继续的消息，而不是空字符串
+                        accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。"
                     break
                     
         except asyncio.CancelledError:
             logger.info(f"[{self.name}] LLM call cancelled")
             raise
+        except Exception as e:
+            # 🔥 增强异常处理，避免吞掉错误
+            logger.error(f"[{self.name}] Unexpected error in stream_llm_call: {e}", exc_info=True)
+            await self.emit_event("error", f"LLM 调用错误: {str(e)}")
+            # 返回错误提示，让 Agent 知道发生了什么
+            accumulated = f"[LLM调用错误: {str(e)}] 请重试。"
         finally:
             await self.emit_thinking_end(accumulated)
         
+        # 🔥 记录空响应警告，帮助调试
+        if not accumulated or not accumulated.strip():
+            logger.warning(f"[{self.name}] Empty LLM response returned (total_tokens: {total_tokens})")
+        
         return accumulated, total_tokens
     
     async def execute_tool(self, tool_name: str, tool_input: Dict) -> str:
diff --git a/backend/app/services/agent/agents/orchestrator.py b/backend/app/services/agent/agents/orchestrator.py
index cb0ab19..33371c0 100644
--- a/backend/app/services/agent/agents/orchestrator.py
+++ b/backend/app/services/agent/agents/orchestrator.py
@@ -440,14 +440,52 @@ class OrchestratorAgent(BaseAgent):
         config: Dict[str, Any],
     ) -> str:
         """构建初始消息"""
+        structure = project_info.get('structure', {})
+        
+        # 🔥 检查是否是限定范围的审计
+        scope_limited = structure.get('scope_limited', False)
+        scope_message = structure.get('scope_message', '')
+        
         msg = f"""请开始对以下项目进行安全审计。
 
 ## 项目信息
 - 名称: {project_info.get('name', 'unknown')}
 - 语言: {project_info.get('languages', [])}
 - 文件数量: {project_info.get('file_count', 0)}
-- 目录结构: {json.dumps(project_info.get('structure', {}), ensure_ascii=False, indent=2)}
+"""
+        
+        # 🔥 根据是否限定范围显示不同的结构信息
+        if scope_limited:
+            msg += f"""
+## ⚠️ 审计范围限定
+**{scope_message}**
 
+### 目标文件列表
+"""
+            for f in structure.get('files', []):
+                msg += f"- {f}\n"
+            
+            if structure.get('directories'):
+                msg += f"""
+### 相关目录
+{structure.get('directories', [])}
+"""
+        else:
+            msg += f"""
+## 目录结构
+{json.dumps(structure, ensure_ascii=False, indent=2)}
+"""
+        
+        # 🔥 如果配置了 target_files，也明确显示
+        target_files = config.get('target_files', [])
+        if target_files:
+            msg += f"""
+## ⚠️ 重要提示
+用户指定了 **{len(target_files)}** 个目标文件进行审计。
+请确保你的分析集中在这些指定的文件上，不要浪费时间分析其他文件。
+"""
+        
+        msg += f"""
 ## 用户配置
 - 目标漏洞: {config.get('target_vulnerabilities', ['all'])}
 - 验证级别: {config.get('verification_level', 'sandbox')}
diff --git a/backend/app/services/agent/agents/recon.py b/backend/app/services/agent/agents/recon.py
index e1dcd9c..a2126fd 100644
--- a/backend/app/services/agent/agents/recon.py
+++ b/backend/app/services/agent/agents/recon.py
@@ -299,19 +299,40 @@ class ReconAgent(BaseAgent):
                 
                 self._total_tokens += tokens_this_round
                 
-                # 🔥 Handle empty LLM response to prevent loops
+                # 🔥 Enhanced: Handle empty LLM response with better diagnostics
                 if not llm_output or not llm_output.strip():
-                    logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}")
                     empty_retry_count = getattr(self, '_empty_retry_count', 0) + 1
                     self._empty_retry_count = empty_retry_count
+                    
+                    # 🔥 记录更详细的诊断信息
+                    logger.warning(
+                        f"[{self.name}] Empty LLM response in iteration {self._iteration} "
+                        f"(retry {empty_retry_count}/3, tokens_this_round={tokens_this_round})"
+                    )
+                    
                     if empty_retry_count >= 3:
-                        logger.error(f"[{self.name}] Too many empty responses, stopping")
-                        error_message = "连续收到空响应，停止信息收集"
-                        await self.emit_event("error", error_message)
+                        logger.error(f"[{self.name}] Too many empty responses, generating fallback result")
+                        error_message = "连续收到空响应，使用回退结果"
+                        await self.emit_event("warning", error_message)
+                        # 🔥 不是直接 break，而是尝试生成一个回退结果
                         break
+                    
+                    # 🔥 更有针对性的重试提示
+                    retry_prompt = f"""收到空响应。请根据以下格式输出你的思考和行动：
+
+Thought: [你对当前情况的分析]
+Action: [工具名称，如 list_files, read_file, search_code]
+Action Input: {{"参数名": "参数值"}}
+
+可用工具: {', '.join(self.tools.keys())}
+
+如果你认为信息收集已经完成，请输出：
+Thought: [总结收集到的信息]
+Final Answer: [JSON格式的结果]"""
+                    
                     self._conversation_history.append({
                         "role": "user",
-                        "content": "Received empty response. Please output your Thought and Action.",
+                        "content": retry_prompt,
                     })
                     continue