diff --git a/backend/app/services/agent/agents/analysis.py b/backend/app/services/agent/agents/analysis.py index 64a5b96..3a748d7 100644 --- a/backend/app/services/agent/agents/analysis.py +++ b/backend/app/services/agent/agents/analysis.py @@ -574,6 +574,17 @@ Final Answer: {{"findings": [...], "summary": "..."}}""" # 重置空响应计数器 self._empty_retry_count = 0 + # 🔥 检查是否是 API 错误(从 BaseAgent.stream_llm_call 返回) + if llm_output.startswith("[API_ERROR:"): + # 提取错误类型和消息 + match = re.match(r"\[API_ERROR:(\w+)\]\s*(.*)", llm_output) + if match: + error_type = match.group(1) + error_message = match.group(2) + logger.error(f"[{self.name}] Fatal API error: {error_type} - {error_message}") + await self.emit_event("error", f"LLM API 错误 ({error_type}): {error_message}") + break + # 解析 LLM 响应 step = self._parse_llm_response(llm_output) self._steps.append(step) diff --git a/backend/app/services/agent/agents/base.py b/backend/app/services/agent/agents/base.py index bdc5188..a2b717f 100644 --- a/backend/app/services/agent/agents/base.py +++ b/backend/app/services/agent/agents/base.py @@ -1033,7 +1033,7 @@ class BaseAgent(ABC): # 使用特殊前缀标记 API 错误,让调用方能够识别 # 格式:[API_ERROR:error_type] user_message - if error_type in ("rate_limit", "quota_exceeded", "authentication", "connection"): + if error_type in ("rate_limit", "quota_exceeded", "authentication", "connection", "server_error"): accumulated = f"[API_ERROR:{error_type}] {user_message}" elif not accumulated: accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。" diff --git a/backend/app/services/agent/agents/orchestrator.py b/backend/app/services/agent/agents/orchestrator.py index 02677db..c1d878f 100644 --- a/backend/app/services/agent/agents/orchestrator.py +++ b/backend/app/services/agent/agents/orchestrator.py @@ -331,6 +331,19 @@ Action Input: {{"参数": "值"}} await asyncio.sleep(5) # 等待 5 秒后重试 continue + elif error_type == "server_error": + # 服务器错误 (5xx) - 重试 + api_retry_count = getattr(self, '_api_retry_count', 0) + 1 + self._api_retry_count = api_retry_count + if api_retry_count >= 3: + logger.error(f"[{self.name}] Too many server errors, stopping") + await self.emit_event("error", f"API 服务端错误重试次数过多: {error_message}") + break + logger.warning(f"[{self.name}] Server error, retrying ({api_retry_count}/3)") + await self.emit_event("warning", f"API 服务端繁忙或异常,重试中 ({api_retry_count}/3)") + await asyncio.sleep(10) # 等待 10 秒后重试(服务端错误通常需要更久恢复) + continue + # 重置 API 重试计数器(成功获取响应后) self._api_retry_count = 0 diff --git a/backend/app/services/agent/agents/recon.py b/backend/app/services/agent/agents/recon.py index 01a7c0e..83061e9 100644 --- a/backend/app/services/agent/agents/recon.py +++ b/backend/app/services/agent/agents/recon.py @@ -509,6 +509,17 @@ Final Answer: [JSON格式的结果]""" # 重置空响应计数器 self._empty_retry_count = 0 + # 🔥 检查是否是 API 错误(从 BaseAgent.stream_llm_call 返回) + if llm_output.startswith("[API_ERROR:"): + # 提取错误类型和消息 + match = re.match(r"\[API_ERROR:(\w+)\]\s*(.*)", llm_output) + if match: + error_type = match.group(1) + error_message = match.group(2) + logger.error(f"[{self.name}] Fatal API error: {error_type} - {error_message}") + await self.emit_event("error", f"LLM API 错误 ({error_type}): {error_message}") + break + # 解析 LLM 响应 step = self._parse_llm_response(llm_output) self._steps.append(step) diff --git a/backend/app/services/agent/agents/verification.py b/backend/app/services/agent/agents/verification.py index 16da14f..3df5ab8 100644 --- a/backend/app/services/agent/agents/verification.py +++ b/backend/app/services/agent/agents/verification.py @@ -718,6 +718,17 @@ class VerificationAgent(BaseAgent): self._total_tokens += tokens_this_round + # 🔥 检查是否是 API 错误(从 BaseAgent.stream_llm_call 返回) + if llm_output.startswith("[API_ERROR:"): + # 提取错误类型和消息 + match = re.match(r"\[API_ERROR:(\w+)\]\s*(.*)", llm_output) + if match: + error_type = match.group(1) + error_message = match.group(2) + logger.error(f"[{self.name}] Fatal API error: {error_type} - {error_message}") + await self.emit_event("error", f"LLM API 错误 ({error_type}): {error_message}") + break + # 🔥 Handle empty LLM response to prevent loops if not llm_output or not llm_output.strip(): logger.warning(f"[{self.name}] Empty LLM response in iteration {self._iteration}") diff --git a/backend/app/services/llm/adapters/litellm_adapter.py b/backend/app/services/llm/adapters/litellm_adapter.py index 3a78549..a1c30ca 100644 --- a/backend/app/services/llm/adapters/litellm_adapter.py +++ b/backend/app/services/llm/adapters/litellm_adapter.py @@ -251,6 +251,9 @@ class LiteLLMAdapter(BaseLLMAdapter): except litellm.exceptions.APIConnectionError as e: api_response = self._extract_api_response(e) raise LLMError(f"无法连接到 API 服务", self.config.provider, api_response=api_response) + except (litellm.exceptions.ServiceUnavailableError, litellm.exceptions.InternalServerError) as e: + api_response = self._extract_api_response(e) + raise LLMError(f"API 服务暂时不可用 ({type(e).__name__})", self.config.provider, 503, api_response=api_response) except litellm.exceptions.APIError as e: api_response = self._extract_api_response(e) raise LLMError(f"API 错误", self.config.provider, getattr(e, 'status_code', None), api_response=api_response) @@ -469,6 +472,17 @@ class LiteLLMAdapter(BaseLLMAdapter): "accumulated": accumulated_content, "usage": None, } + except (litellm.exceptions.ServiceUnavailableError, litellm.exceptions.InternalServerError) as e: + # 服务不可用 - 服务器端 5xx 错误 + logger.error(f"Stream server error ({type(e).__name__}): {e}") + yield { + "type": "error", + "error_type": "server_error", + "error": str(e), + "user_message": f"API 服务暂时不可用 ({type(e).__name__})", + "accumulated": accumulated_content, + "usage": None, + } except Exception as e: # 其他错误 - 检查是否是包装的速率限制错误 diff --git a/frontend/src/pages/AgentAudit/index.tsx b/frontend/src/pages/AgentAudit/index.tsx index ff107f6..6b74c19 100644 --- a/frontend/src/pages/AgentAudit/index.tsx +++ b/frontend/src/pages/AgentAudit/index.tsx @@ -146,23 +146,33 @@ function AgentAuditPageContent() { }, [loadAgentTree]); // 🔥 NEW: 加载历史事件并转换为日志项 - const loadHistoricalEvents = useCallback(async () => { + const loadHistoricalEvents = useCallback(async (isSync = false) => { if (!taskId) return 0; - // 🔥 防止重复加载历史事件 - if (hasLoadedHistoricalEventsRef.current) { + // 🔥 只有在非同步模式下才检查是否已加载过(首屏加载) + if (!isSync && hasLoadedHistoricalEventsRef.current) { console.log('[AgentAudit] Historical events already loaded, skipping'); return 0; } - hasLoadedHistoricalEventsRef.current = true; + + // 如果是首屏加载,标记为已尝试加载 + if (!isSync) { + hasLoadedHistoricalEventsRef.current = true; + } try { - console.log(`[AgentAudit] Fetching historical events for task ${taskId}...`); - const events = await getAgentEvents(taskId, { limit: 500 }); + const currentSeq = lastEventSequenceRef.current; + console.log(`[AgentAudit] Fetching historical events for task ${taskId} (after_sequence=${currentSeq})...`); + + // 🔥 传递当前已知的最后序列号,实现增量加载 + const events = await getAgentEvents(taskId, { + after_sequence: currentSeq, + limit: 500 + }); console.log(`[AgentAudit] Received ${events.length} events from API`); if (events.length === 0) { - console.log('[AgentAudit] No historical events found'); + console.log('[AgentAudit] No new historical events found'); return 0; } @@ -629,8 +639,43 @@ function AgentAuditPageContent() { disconnectStreamRef.current = disconnectStream; }, [disconnectStream]); + // 🔥 Centralized Sync State Function + const syncState = useCallback(async () => { + if (!taskId) return; + + console.log('[AgentAudit] Synchronizing state...'); + try { + // 1. 同步任务基本信息、发现项和树结构 + await Promise.all([loadTask(), loadFindings(), loadAgentTree()]); + + // 2. 增量同步日志完成状态或缺失日志 + await loadHistoricalEvents(true); + + // 3. 如果连接断开且任务仍在运行,尝试重新连接流 + if (!isConnected && isRunning) { + console.log('[AgentAudit] Stream disconnected while task running, attempting reconnect...'); + hasConnectedRef.current = false; // 允许重新连接 + connectStream(); + } + } catch (err) { + console.error('[AgentAudit] Failed to sync state:', err); + } + }, [taskId, loadTask, loadFindings, loadAgentTree, loadHistoricalEvents, isConnected, isRunning, connectStream]); + // ============ Effects ============ + // 🔥 Visibility Change Listener - 同步状态的主要触发器 + useEffect(() => { + const handleVisibilityChange = () => { + if (document.visibilityState === 'visible') { + syncState(); + } + }; + + document.addEventListener('visibilitychange', handleVisibilityChange); + return () => document.removeEventListener('visibilitychange', handleVisibilityChange); + }, [syncState]); + // Status animation useEffect(() => { if (!isRunning) return;