Fix: Improve agent stream stability by preventing unnecessary reconnections and correctly draining buffered events.

This commit is contained in:
lintsinghua 2025-12-13 20:21:30 +08:00
parent 507fe393d1
commit d449e2ba78
5 changed files with 106 additions and 36 deletions

View File

@ -942,7 +942,9 @@ class BaseAgent(ABC):
logger.info(f"[{self.name}] Cancelled before LLM call") logger.info(f"[{self.name}] Cancelled before LLM call")
return "", 0 return "", 0
logger.info(f"[{self.name}] 🚀 Starting stream_llm_call, emitting thinking_start...")
await self.emit_thinking_start() await self.emit_thinking_start()
logger.info(f"[{self.name}] ✅ thinking_start emitted, starting LLM stream...")
try: try:
async for chunk in self.llm_service.chat_completion_stream( async for chunk in self.llm_service.chat_completion_stream(

View File

@ -312,6 +312,13 @@ class EventManager:
if task_id in self._event_queues: if task_id in self._event_queues:
try: try:
self._event_queues[task_id].put_nowait(event_data) self._event_queues[task_id].put_nowait(event_data)
# 🔥 DEBUG: 记录重要事件被添加到队列
if event_type in ["thinking_start", "thinking_end", "dispatch", "task_complete", "task_error"]:
logger.info(f"[EventQueue] Added {event_type} to queue for task {task_id}, queue size: {self._event_queues[task_id].qsize()}")
elif event_type == "thinking_token":
# 每10个token记录一次
if sequence % 10 == 0:
logger.debug(f"[EventQueue] Added thinking_token #{sequence} to queue, size: {self._event_queues[task_id].qsize()}")
except asyncio.QueueFull: except asyncio.QueueFull:
logger.warning(f"Event queue full for task {task_id}, dropping event: {event_type}") logger.warning(f"Event queue full for task {task_id}, dropping event: {event_type}")
@ -438,16 +445,22 @@ class EventManager:
# 获取现有队列(由 AgentRunner 在初始化时创建) # 获取现有队列(由 AgentRunner 在初始化时创建)
queue = self._event_queues.get(task_id) queue = self._event_queues.get(task_id)
if not queue: if not queue:
# 如果队列不存在,创建一个新的(回退逻辑) # 如果队列不存在,创建一个新的(回退逻辑)
queue = self.create_queue(task_id) queue = self.create_queue(task_id)
logger.warning(f"Queue not found for task {task_id}, created new one") logger.warning(f"Queue not found for task {task_id}, created new one")
# 🔥 先排空队列中已缓存的事件(这些是在 SSE 连接前产生的) # 🔥 CRITICAL FIX: 记录当前队列大小,只消耗这些已存在的事件
# 之前的 bug: while not queue.empty() 会永远循环,因为 LLM 持续添加事件
initial_queue_size = queue.qsize()
logger.info(f"[StreamEvents] Task {task_id}: Draining {initial_queue_size} buffered events...")
# 🔥 先排空队列中已缓存的事件(只消耗连接时已存在的事件数量)
buffered_count = 0 buffered_count = 0
skipped_count = 0 skipped_count = 0
while not queue.empty(): max_drain = initial_queue_size # 只消耗这么多事件,避免无限循环
for _ in range(max_drain):
try: try:
buffered_event = queue.get_nowait() buffered_event = queue.get_nowait()
@ -460,38 +473,48 @@ class EventManager:
buffered_count += 1 buffered_count += 1
yield buffered_event yield buffered_event
# 🔥 为所有缓存事件添加延迟,确保不会一起输出 # 🔥 为缓存事件添加小延迟,但比之前少很多(避免拖慢)
event_type = buffered_event.get("event_type") event_type = buffered_event.get("event_type")
if event_type == "thinking_token": if event_type == "thinking_token":
await asyncio.sleep(0.015) # 15ms for tokens await asyncio.sleep(0.005) # 5ms for tokens (reduced from 15ms)
else: # 其他事件不加延迟,快速发送
await asyncio.sleep(0.005) # 5ms for other events
# 检查是否是结束事件 # 检查是否是结束事件
if event_type in ["task_complete", "task_error", "task_cancel"]: if event_type in ["task_complete", "task_error", "task_cancel"]:
logger.debug(f"Task {task_id} already completed, sent {buffered_count} buffered events (skipped {skipped_count})") logger.info(f"[StreamEvents] Task {task_id} already completed, sent {buffered_count} buffered events (skipped {skipped_count})")
return return
except asyncio.QueueEmpty: except asyncio.QueueEmpty:
break break
if buffered_count > 0 or skipped_count > 0: if buffered_count > 0 or skipped_count > 0:
logger.debug(f"Drained queue for task {task_id}: sent {buffered_count}, skipped {skipped_count} (after_sequence={after_sequence})") logger.info(f"[StreamEvents] Task {task_id}: Drained {buffered_count} buffered events, skipped {skipped_count}")
# 🔥 DEBUG: 记录进入实时循环
logger.info(f"[StreamEvents] Task {task_id}: Entering real-time loop, queue size: {queue.qsize()}")
# 然后实时推送新事件 # 然后实时推送新事件
try: try:
while True: while True:
try: try:
logger.debug(f"[StreamEvents] Task {task_id}: Waiting for next event from queue...")
event = await asyncio.wait_for(queue.get(), timeout=30) event = await asyncio.wait_for(queue.get(), timeout=30)
logger.debug(f"[StreamEvents] Task {task_id}: Got event from queue: {event.get('event_type')}")
# 🔥 过滤掉序列号 <= after_sequence 的事件 # 🔥 过滤掉序列号 <= after_sequence 的事件
event_sequence = event.get("sequence", 0) event_sequence = event.get("sequence", 0)
if event_sequence <= after_sequence: if event_sequence <= after_sequence:
logger.debug(f"[StreamEvents] Task {task_id}: Skipping event seq={event_sequence} (after_sequence={after_sequence})")
continue continue
# 🔥 DEBUG: 记录重要事件被发送
event_type = event.get("event_type")
if event_type in ["thinking_start", "thinking_end", "dispatch", "task_complete", "task_error"]:
logger.info(f"[StreamEvents] Yielding {event_type} (seq={event_sequence}) for task {task_id}")
yield event yield event
# 🔥 为 thinking_token 添加微延迟确保流式效果 # 🔥 为 thinking_token 添加微延迟确保流式效果
if event.get("event_type") == "thinking_token": if event_type == "thinking_token":
await asyncio.sleep(0.01) # 10ms await asyncio.sleep(0.01) # 10ms
# 检查是否是结束事件 # 检查是否是结束事件

View File

@ -93,6 +93,10 @@ export function useAgentStream(
const handlerRef = useRef<AgentStreamHandler | null>(null); const handlerRef = useRef<AgentStreamHandler | null>(null);
const thinkingBufferRef = useRef<string[]>([]); const thinkingBufferRef = useRef<string[]>([]);
// 🔥 使用 ref 存储 afterSequence避免 connect 函数依赖变化导致重连
const afterSequenceRef = useRef(afterSequence);
afterSequenceRef.current = afterSequence;
// 连接 // 连接
const connect = useCallback(() => { const connect = useCallback(() => {
if (!taskId) return; if (!taskId) return;
@ -114,11 +118,15 @@ export function useAgentStream(
setError(null); setError(null);
thinkingBufferRef.current = []; thinkingBufferRef.current = [];
// 🔥 使用 ref 获取最新的 afterSequence 值
const currentAfterSequence = afterSequenceRef.current;
console.log(`[useAgentStream] Creating handler with afterSequence=${currentAfterSequence}`);
// 创建新的 handler // 创建新的 handler
handlerRef.current = new AgentStreamHandler(taskId, { handlerRef.current = new AgentStreamHandler(taskId, {
includeThinking, includeThinking,
includeToolCalls, includeToolCalls,
afterSequence, afterSequence: currentAfterSequence,
onEvent: (event) => { onEvent: (event) => {
// Pass to custom callback first (important for capturing metadata like agent_name) // Pass to custom callback first (important for capturing metadata like agent_name)
@ -215,7 +223,7 @@ export function useAgentStream(
handlerRef.current.connect(); handlerRef.current.connect();
setIsConnected(true); setIsConnected(true);
}, [taskId, includeThinking, includeToolCalls, afterSequence, maxEvents]); // 🔥 移除 callbackOptions 依赖 }, [taskId, includeThinking, includeToolCalls, maxEvents]); // 🔥 移除 afterSequence 依赖,使用 ref 代替
// 断开连接 // 断开连接
const disconnect = useCallback(() => { const disconnect = useCallback(() => {

View File

@ -423,14 +423,16 @@ function AgentAuditPageContent() {
if (!currentId) { if (!currentId) {
// 预生成 ID这样我们可以跟踪这个日志 // 预生成 ID这样我们可以跟踪这个日志
const newLogId = `thinking-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; const newLogId = `thinking-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
dispatch({ type: 'ADD_LOG', payload: { dispatch({
type: 'ADD_LOG', payload: {
id: newLogId, id: newLogId,
type: 'thinking', type: 'thinking',
title: 'Thinking...', title: 'Thinking...',
content: cleanContent, content: cleanContent,
isStreaming: true, isStreaming: true,
agentName: getCurrentAgentName() || undefined, agentName: getCurrentAgentName() || undefined,
}}); }
});
setCurrentThinkingId(newLogId); setCurrentThinkingId(newLogId);
} else { } else {
updateLog(currentId, { content: cleanContent }); updateLog(currentId, { content: cleanContent });
@ -589,14 +591,19 @@ function AgentAuditPageContent() {
if (hasConnectedRef.current) return; if (hasConnectedRef.current) return;
hasConnectedRef.current = true; hasConnectedRef.current = true;
console.log(`[AgentAudit] Connecting to stream with afterSequence=${afterSequence}`); console.log(`[AgentAudit] Connecting to stream (afterSequence will be passed via streamOptions)`);
connectStream(); connectStream();
dispatch({ type: 'ADD_LOG', payload: { type: 'info', title: 'Connected to audit stream' } }); dispatch({ type: 'ADD_LOG', payload: { type: 'info', title: 'Connected to audit stream' } });
return () => { return () => {
console.log('[AgentAudit] Cleanup: disconnecting stream');
disconnectStream(); disconnectStream();
}; };
}, [taskId, task?.status, historicalEventsLoaded, connectStream, disconnectStream, dispatch, afterSequence]); // 🔥 CRITICAL FIX: 移除 afterSequence 依赖!
// afterSequence 通过 streamOptions 传递,不需要在这里触发重连
// 如果包含它,当 loadHistoricalEvents 更新 afterSequence 时会触发断开重连
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [taskId, task?.status, historicalEventsLoaded, connectStream, disconnectStream, dispatch]);
// Polling // Polling
useEffect(() => { useEffect(() => {

View File

@ -30,6 +30,7 @@ export type StreamEventType =
| 'phase_end' | 'phase_end'
| 'phase_complete' | 'phase_complete'
// 发现相关 // 发现相关
| 'finding' // Backward compatibility
| 'finding_new' | 'finding_new'
| 'finding_verified' | 'finding_verified'
// 状态相关 // 状态相关
@ -133,8 +134,11 @@ export class AgentStreamHandler {
* *
*/ */
connect(): void { connect(): void {
// 🔥 重置断开标志,允许新的连接
this.isDisconnecting = false;
// 🔥 如果已经连接,不重复连接 // 🔥 如果已经连接,不重复连接
if (this.isConnected || this.isDisconnecting) { if (this.isConnected) {
return; return;
} }
@ -196,12 +200,14 @@ export class AgentStreamHandler {
while (true) { while (true) {
// 🔥 检查是否正在断开 // 🔥 检查是否正在断开
if (this.isDisconnecting) { if (this.isDisconnecting) {
console.log('[AgentStream] Disconnecting, breaking loop');
break; break;
} }
const { done, value } = await this.reader.read(); const { done, value } = await this.reader.read();
if (done) { if (done) {
console.log('[AgentStream] Reader done, stream ended');
break; break;
} }
@ -211,6 +217,12 @@ export class AgentStreamHandler {
const events = this.parseSSE(buffer); const events = this.parseSSE(buffer);
buffer = events.remaining; buffer = events.remaining;
// 🔥 DEBUG: 记录接收到的事件
if (events.parsed.length > 0) {
const eventTypes = events.parsed.map(e => e.type);
console.log(`[AgentStream] Received ${events.parsed.length} events:`, eventTypes);
}
// 🔥 逐个处理事件,添加微延迟确保 React 能逐个渲染 // 🔥 逐个处理事件,添加微延迟确保 React 能逐个渲染
for (const event of events.parsed) { for (const event of events.parsed) {
this.handleEvent(event); this.handleEvent(event);
@ -448,21 +460,39 @@ export class AgentStreamHandler {
this.isDisconnecting = true; this.isDisconnecting = true;
this.isConnected = false; this.isConnected = false;
// 🔥 取消 fetch 请求 // 🔥 取消 fetch 请求 (wrap in try-catch to handle AbortError)
if (this.abortController) { if (this.abortController) {
try {
this.abortController.abort(); this.abortController.abort();
} catch {
// 忽略 abort 错误
}
this.abortController = null; this.abortController = null;
} }
// 🔥 清理 reader // 🔥 清理 reader (handle promise rejection from cancel())
if (this.reader) { if (this.reader) {
try { const reader = this.reader;
this.reader.cancel();
this.reader.releaseLock();
} catch {
// 忽略清理错误
}
this.reader = null; this.reader = null;
// reader.cancel() returns a Promise that may reject with AbortError
// We need to catch this to prevent unhandled promise rejection
Promise.resolve().then(() => {
try {
// Cancel and release in a controlled way
reader.cancel().catch(() => {
// Silently ignore cancel errors (expected during abort)
}).finally(() => {
try {
reader.releaseLock();
} catch {
// Silently ignore releaseLock errors
}
});
} catch {
// Silently ignore any synchronous errors
}
});
} }
// 清理 EventSource如果使用 // 清理 EventSource如果使用