feat: 增加文件上传大小限制至500MB并优化大文件处理

增加ZIP文件上传大小限制从100MB到500MB 在agent工具中添加失败调用追踪和自动跳过机制优化大文件读取性能，支持流式处理指定行范围
2025-12-15 09:21:37 +08:00 · 2025-12-15 09:21:37 +08:00 · cdf360dcf7
parent 2df1b39e08
commit cdf360dcf7
9 changed files with 164 additions and 29 deletions
--- a/backend/app/api/v1/endpoints/projects.py
+++ b/backend/app/api/v1/endpoints/projects.py
@ -584,8 +584,8 @@ async def upload_project_zip(
        
        # 检查文件大小
        file_size = os.path.getsize(temp_file_path)
-        if file_size > 100 * 1024 * 1024:  # 100MB limit
-            raise HTTPException(status_code=400, detail="文件大小不能超过100MB")
+        if file_size > 500 * 1024 * 1024:  # 500MB limit
+            raise HTTPException(status_code=400, detail="文件大小不能超过500MB")
        
        # 保存到持久化存储
        meta = await save_project_zip(id, temp_file_path, file.filename)
--- a/backend/app/api/v1/endpoints/scan.py
+++ b/backend/app/api/v1/endpoints/scan.py
@ -255,9 +255,9 @@ async def scan_zip(
    
    # Check file size
    file_size = os.path.getsize(file_path)
-    if file_size > 100 * 1024 * 1024:  # 100MB limit
+    if file_size > 500 * 1024 * 1024:  # 500MB limit
        os.remove(file_path)
-        raise HTTPException(status_code=400, detail="文件大小不能超过100MB")
+        raise HTTPException(status_code=400, detail="文件大小不能超过500MB")
    
    # 保存ZIP文件到持久化存储
    await save_project_zip(project_id, file_path, file.filename)
--- a/backend/app/services/agent/agents/analysis.py
+++ b/backend/app/services/agent/agents/analysis.py
@ -535,11 +535,45 @@ Final Answer: {{"findings": [...], "summary": "..."}}"""
                    # 🔥 发射 LLM 动作决策事件
                    await self.emit_llm_action(step.action, step.action_input or {})
                    
+                    # 🔥 循环检测：追踪工具调用失败历史
+                    tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
+                    if not hasattr(self, '_failed_tool_calls'):
+                        self._failed_tool_calls = {}
+                    
                    observation = await self.execute_tool(
                        step.action,
                        step.action_input or {}
                    )
                    
+                    # 🔥 检测工具调用失败并追踪
+                    is_tool_error = (
+                        "失败" in observation or 
+                        "错误" in observation or 
+                        "不存在" in observation or
+                        "文件过大" in observation or
+                        "Error" in observation
+                    )
+                    
+                    if is_tool_error:
+                        self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
+                        fail_count = self._failed_tool_calls[tool_call_key]
+                        
+                        # 🔥 如果同一调用连续失败3次，添加强制跳过提示
+                        if fail_count >= 3:
+                            logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
+                            observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请：\n"
+                            observation += "1. 尝试使用不同的参数（如指定较小的行范围）\n"
+                            observation += "2. 使用 search_code 工具定位关键代码片段\n"
+                            observation += "3. 跳过此文件，继续分析其他文件\n"
+                            observation += "4. 如果已有足够发现，直接输出 Final Answer"
+                            
+                            # 重置计数器但保留记录
+                            self._failed_tool_calls[tool_call_key] = 0
+                    else:
+                        # 成功调用，重置失败计数
+                        if tool_call_key in self._failed_tool_calls:
+                            del self._failed_tool_calls[tool_call_key]
+                    
                    # 🔥 工具执行后检查取消状态
                    if self.is_cancelled:
                        logger.info(f"[{self.name}] Cancelled after tool execution")
--- a/backend/app/services/agent/agents/recon.py
+++ b/backend/app/services/agent/agents/recon.py
@ -301,11 +301,45 @@ Final Answer: [JSON格式的结果]"""
                    # 🔥 发射 LLM 动作决策事件
                    await self.emit_llm_action(step.action, step.action_input or {})
                    
+                    # 🔥 循环检测：追踪工具调用失败历史
+                    tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
+                    if not hasattr(self, '_failed_tool_calls'):
+                        self._failed_tool_calls = {}
+                    
                    observation = await self.execute_tool(
                        step.action,
                        step.action_input or {}
                    )
                    
+                    # 🔥 检测工具调用失败并追踪
+                    is_tool_error = (
+                        "失败" in observation or 
+                        "错误" in observation or 
+                        "不存在" in observation or
+                        "文件过大" in observation or
+                        "Error" in observation
+                    )
+                    
+                    if is_tool_error:
+                        self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
+                        fail_count = self._failed_tool_calls[tool_call_key]
+                        
+                        # 🔥 如果同一调用连续失败3次，添加强制跳过提示
+                        if fail_count >= 3:
+                            logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
+                            observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请：\n"
+                            observation += "1. 尝试使用不同的参数（如指定较小的行范围）\n"
+                            observation += "2. 使用 search_code 工具定位关键代码片段\n"
+                            observation += "3. 跳过此文件，继续分析其他文件\n"
+                            observation += "4. 如果已有足够信息，直接输出 Final Answer"
+                            
+                            # 重置计数器但保留记录
+                            self._failed_tool_calls[tool_call_key] = 0
+                    else:
+                        # 成功调用，重置失败计数
+                        if tool_call_key in self._failed_tool_calls:
+                            del self._failed_tool_calls[tool_call_key]
+                    
                    # 🔥 工具执行后检查取消状态
                    if self.is_cancelled:
                        logger.info(f"[{self.name}] Cancelled after tool execution")
--- a/backend/app/services/agent/agents/verification.py
+++ b/backend/app/services/agent/agents/verification.py
@ -584,11 +584,45 @@ class VerificationAgent(BaseAgent):
                    # 🔥 发射 LLM 动作决策事件
                    await self.emit_llm_action(step.action, step.action_input or {})
                    
+                    # 🔥 循环检测：追踪工具调用失败历史
+                    tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
+                    if not hasattr(self, '_failed_tool_calls'):
+                        self._failed_tool_calls = {}
+                    
                    observation = await self.execute_tool(
                        step.action,
                        step.action_input or {}
                    )
                    
+                    # 🔥 检测工具调用失败并追踪
+                    is_tool_error = (
+                        "失败" in observation or 
+                        "错误" in observation or 
+                        "不存在" in observation or
+                        "文件过大" in observation or
+                        "Error" in observation
+                    )
+                    
+                    if is_tool_error:
+                        self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
+                        fail_count = self._failed_tool_calls[tool_call_key]
+                        
+                        # 🔥 如果同一调用连续失败3次，添加强制跳过提示
+                        if fail_count >= 3:
+                            logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
+                            observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请：\n"
+                            observation += "1. 尝试使用不同的参数（如指定较小的行范围）\n"
+                            observation += "2. 使用 search_code 工具定位关键代码片段\n"
+                            observation += "3. 跳过此发现的验证，继续验证其他发现\n"
+                            observation += "4. 如果已有足够验证结果，直接输出 Final Answer"
+                            
+                            # 重置计数器
+                            self._failed_tool_calls[tool_call_key] = 0
+                    else:
+                        # 成功调用，重置失败计数
+                        if tool_call_key in self._failed_tool_calls:
+                            del self._failed_tool_calls[tool_call_key]
+                    
                    step.observation = observation
                    
                    # 🔥 发射 LLM 观察事件
--- a/backend/app/services/agent/tools/file_tool.py
+++ b/backend/app/services/agent/tools/file_tool.py
@ -125,31 +125,64 @@ class FileReadTool(AgentTool):
            
            # 检查文件大小
            file_size = os.path.getsize(full_path)
-            if file_size > 1024 * 1024:  # 1MB
+            is_large_file = file_size > 1024 * 1024  # 1MB
+            
+            # 🔥 修复：如果指定了行范围，允许读取大文件的部分内容
+            if is_large_file and start_line is None and end_line is None:
                return ToolResult(
                    success=False,
-                    error=f"文件过大 ({file_size / 1024:.1f}KB)，请指定行范围",
+                    error=f"文件过大 ({file_size / 1024:.1f}KB)，请指定 start_line 和 end_line 读取部分内容",
                )
            
-            # 读取文件
-            with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
-                lines = f.readlines()
-            
-            total_lines = len(lines)
-            
-            # 处理行范围
-            if start_line is not None:
-                start_idx = max(0, start_line - 1)
+            # 🔥 对于大文件，使用流式读取指定行范围
+            if is_large_file and (start_line is not None or end_line is not None):
+                # 流式读取，避免一次性加载整个文件
+                selected_lines = []
+                total_lines = 0
+                
+                # 计算实际的起始和结束行
+                start_idx = max(0, (start_line or 1) - 1)
+                end_idx = end_line if end_line else start_idx + max_lines
+                
+                with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    for i, line in enumerate(f):
+                        total_lines = i + 1
+                        if i >= start_idx and i < end_idx:
+                            selected_lines.append(line)
+                        elif i >= end_idx:
+                            # 继续计数以获取总行数，但限制读取量
+                            if i < end_idx + 1000:  # 最多再读1000行来估算总行数
+                                continue
+                            else:
+                                # 估算剩余行数
+                                remaining_bytes = file_size - f.tell()
+                                avg_line_size = f.tell() / (i + 1)
+                                estimated_remaining_lines = int(remaining_bytes / avg_line_size) if avg_line_size > 0 else 0
+                                total_lines = i + 1 + estimated_remaining_lines
+                                break
+                
+                # 更新实际的结束索引
+                end_idx = min(end_idx, start_idx + len(selected_lines))
            else:
-                start_idx = 0
-            
-            if end_line is not None:
-                end_idx = min(total_lines, end_line)
-            else:
-                end_idx = min(total_lines, start_idx + max_lines)
-            
-            # 截取指定行
-            selected_lines = lines[start_idx:end_idx]
+                # 正常读取小文件
+                with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    lines = f.readlines()
+                
+                total_lines = len(lines)
+                
+                # 处理行范围
+                if start_line is not None:
+                    start_idx = max(0, start_line - 1)
+                else:
+                    start_idx = 0
+                
+                if end_line is not None:
+                    end_idx = min(total_lines, end_line)
+                else:
+                    end_idx = min(total_lines, start_idx + max_lines)
+                
+                # 截取指定行
+                selected_lines = lines[start_idx:end_idx]
            
            # 添加行号
            numbered_lines = []
--- a/frontend/src/features/projects/services/repoZipScan.ts
+++ b/frontend/src/features/projects/services/repoZipScan.ts
@ -65,10 +65,10 @@ export function validateZipFile(file: File): { valid: boolean; error?: string }
    return { valid: false, error: '请上传ZIP格式的文件' };
  }

-  // 检查文件大小 (限制为100MB)
-  const maxSize = 100 * 1024 * 1024;
+  // 检查文件大小 (限制为500MB)
+  const maxSize = 500 * 1024 * 1024;
  if (file.size > maxSize) {
-    return { valid: false, error: '文件大小不能超过100MB' };
+    return { valid: false, error: '文件大小不能超过500MB' };
  }

  return { valid: true };
--- a/frontend/src/pages/AgentAudit/components/LogEntry.tsx
+++ b/frontend/src/pages/AgentAudit/components/LogEntry.tsx
@ -217,7 +217,7 @@ export const LogEntry = memo(function LogEntry({ item, isExpanded, onToggle }: L
          {isThinking && item.content && (
            <div className="mt-2.5 relative">
              <div className="absolute left-0 top-0 bottom-0 w-px bg-gradient-to-b from-purple-500/50 via-purple-500/20 to-transparent" />
-              <div className="pl-3 text-sm text-purple-200/90 leading-relaxed whitespace-pre-wrap break-words max-h-48 overflow-y-auto custom-scrollbar">
+              <div className="pl-3 text-sm text-purple-200/90 leading-relaxed whitespace-pre-wrap break-words">
                {item.content}
              </div>
            </div>
--- a/frontend/src/pages/Projects.tsx
+++ b/frontend/src/pages/Projects.tsx
@ -636,7 +636,7 @@ export default function Projects() {
                      <Upload className="w-10 h-10 text-gray-500 mx-auto mb-3 group-hover:text-primary transition-colors" />
                      <h3 className="text-base font-bold text-gray-300 uppercase mb-1">上传 ZIP 归档</h3>
                      <p className="text-[10px] font-mono text-gray-500 mb-3">
-                        最大: 100MB // 格式: .ZIP
+                        最大: 500MB // 格式: .ZIP
                      </p>
                      <input
                        ref={fileInputRef}