feat: 增加文件上传大小限制至500MB并优化大文件处理

增加ZIP文件上传大小限制从100MB到500MB
在agent工具中添加失败调用追踪和自动跳过机制
优化大文件读取性能,支持流式处理指定行范围
This commit is contained in:
lintsinghua 2025-12-15 09:21:37 +08:00
parent 2df1b39e08
commit cdf360dcf7
9 changed files with 164 additions and 29 deletions

View File

@ -584,8 +584,8 @@ async def upload_project_zip(
# 检查文件大小
file_size = os.path.getsize(temp_file_path)
if file_size > 100 * 1024 * 1024: # 100MB limit
raise HTTPException(status_code=400, detail="文件大小不能超过100MB")
if file_size > 500 * 1024 * 1024: # 500MB limit
raise HTTPException(status_code=400, detail="文件大小不能超过500MB")
# 保存到持久化存储
meta = await save_project_zip(id, temp_file_path, file.filename)

View File

@ -255,9 +255,9 @@ async def scan_zip(
# Check file size
file_size = os.path.getsize(file_path)
if file_size > 100 * 1024 * 1024: # 100MB limit
if file_size > 500 * 1024 * 1024: # 500MB limit
os.remove(file_path)
raise HTTPException(status_code=400, detail="文件大小不能超过100MB")
raise HTTPException(status_code=400, detail="文件大小不能超过500MB")
# 保存ZIP文件到持久化存储
await save_project_zip(project_id, file_path, file.filename)

View File

@ -535,11 +535,45 @@ Final Answer: {{"findings": [...], "summary": "..."}}"""
# 🔥 发射 LLM 动作决策事件
await self.emit_llm_action(step.action, step.action_input or {})
# 🔥 循环检测:追踪工具调用失败历史
tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
if not hasattr(self, '_failed_tool_calls'):
self._failed_tool_calls = {}
observation = await self.execute_tool(
step.action,
step.action_input or {}
)
# 🔥 检测工具调用失败并追踪
is_tool_error = (
"失败" in observation or
"错误" in observation or
"不存在" in observation or
"文件过大" in observation or
"Error" in observation
)
if is_tool_error:
self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
fail_count = self._failed_tool_calls[tool_call_key]
# 🔥 如果同一调用连续失败3次添加强制跳过提示
if fail_count >= 3:
logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请:\n"
observation += "1. 尝试使用不同的参数(如指定较小的行范围)\n"
observation += "2. 使用 search_code 工具定位关键代码片段\n"
observation += "3. 跳过此文件,继续分析其他文件\n"
observation += "4. 如果已有足够发现,直接输出 Final Answer"
# 重置计数器但保留记录
self._failed_tool_calls[tool_call_key] = 0
else:
# 成功调用,重置失败计数
if tool_call_key in self._failed_tool_calls:
del self._failed_tool_calls[tool_call_key]
# 🔥 工具执行后检查取消状态
if self.is_cancelled:
logger.info(f"[{self.name}] Cancelled after tool execution")

View File

@ -301,11 +301,45 @@ Final Answer: [JSON格式的结果]"""
# 🔥 发射 LLM 动作决策事件
await self.emit_llm_action(step.action, step.action_input or {})
# 🔥 循环检测:追踪工具调用失败历史
tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
if not hasattr(self, '_failed_tool_calls'):
self._failed_tool_calls = {}
observation = await self.execute_tool(
step.action,
step.action_input or {}
)
# 🔥 检测工具调用失败并追踪
is_tool_error = (
"失败" in observation or
"错误" in observation or
"不存在" in observation or
"文件过大" in observation or
"Error" in observation
)
if is_tool_error:
self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
fail_count = self._failed_tool_calls[tool_call_key]
# 🔥 如果同一调用连续失败3次添加强制跳过提示
if fail_count >= 3:
logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请:\n"
observation += "1. 尝试使用不同的参数(如指定较小的行范围)\n"
observation += "2. 使用 search_code 工具定位关键代码片段\n"
observation += "3. 跳过此文件,继续分析其他文件\n"
observation += "4. 如果已有足够信息,直接输出 Final Answer"
# 重置计数器但保留记录
self._failed_tool_calls[tool_call_key] = 0
else:
# 成功调用,重置失败计数
if tool_call_key in self._failed_tool_calls:
del self._failed_tool_calls[tool_call_key]
# 🔥 工具执行后检查取消状态
if self.is_cancelled:
logger.info(f"[{self.name}] Cancelled after tool execution")

View File

@ -584,11 +584,45 @@ class VerificationAgent(BaseAgent):
# 🔥 发射 LLM 动作决策事件
await self.emit_llm_action(step.action, step.action_input or {})
# 🔥 循环检测:追踪工具调用失败历史
tool_call_key = f"{step.action}:{json.dumps(step.action_input or {}, sort_keys=True)}"
if not hasattr(self, '_failed_tool_calls'):
self._failed_tool_calls = {}
observation = await self.execute_tool(
step.action,
step.action_input or {}
)
# 🔥 检测工具调用失败并追踪
is_tool_error = (
"失败" in observation or
"错误" in observation or
"不存在" in observation or
"文件过大" in observation or
"Error" in observation
)
if is_tool_error:
self._failed_tool_calls[tool_call_key] = self._failed_tool_calls.get(tool_call_key, 0) + 1
fail_count = self._failed_tool_calls[tool_call_key]
# 🔥 如果同一调用连续失败3次添加强制跳过提示
if fail_count >= 3:
logger.warning(f"[{self.name}] Tool call failed {fail_count} times: {tool_call_key}")
observation += f"\n\n⚠️ **系统提示**: 此工具调用已连续失败 {fail_count} 次。请:\n"
observation += "1. 尝试使用不同的参数(如指定较小的行范围)\n"
observation += "2. 使用 search_code 工具定位关键代码片段\n"
observation += "3. 跳过此发现的验证,继续验证其他发现\n"
observation += "4. 如果已有足够验证结果,直接输出 Final Answer"
# 重置计数器
self._failed_tool_calls[tool_call_key] = 0
else:
# 成功调用,重置失败计数
if tool_call_key in self._failed_tool_calls:
del self._failed_tool_calls[tool_call_key]
step.observation = observation
# 🔥 发射 LLM 观察事件

View File

@ -125,13 +125,46 @@ class FileReadTool(AgentTool):
# 检查文件大小
file_size = os.path.getsize(full_path)
if file_size > 1024 * 1024: # 1MB
is_large_file = file_size > 1024 * 1024 # 1MB
# 🔥 修复:如果指定了行范围,允许读取大文件的部分内容
if is_large_file and start_line is None and end_line is None:
return ToolResult(
success=False,
error=f"文件过大 ({file_size / 1024:.1f}KB),请指定行范围",
error=f"文件过大 ({file_size / 1024:.1f}KB),请指定 start_line 和 end_line 读取部分内容",
)
# 读取文件
# 🔥 对于大文件,使用流式读取指定行范围
if is_large_file and (start_line is not None or end_line is not None):
# 流式读取,避免一次性加载整个文件
selected_lines = []
total_lines = 0
# 计算实际的起始和结束行
start_idx = max(0, (start_line or 1) - 1)
end_idx = end_line if end_line else start_idx + max_lines
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
for i, line in enumerate(f):
total_lines = i + 1
if i >= start_idx and i < end_idx:
selected_lines.append(line)
elif i >= end_idx:
# 继续计数以获取总行数,但限制读取量
if i < end_idx + 1000: # 最多再读1000行来估算总行数
continue
else:
# 估算剩余行数
remaining_bytes = file_size - f.tell()
avg_line_size = f.tell() / (i + 1)
estimated_remaining_lines = int(remaining_bytes / avg_line_size) if avg_line_size > 0 else 0
total_lines = i + 1 + estimated_remaining_lines
break
# 更新实际的结束索引
end_idx = min(end_idx, start_idx + len(selected_lines))
else:
# 正常读取小文件
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()

View File

@ -65,10 +65,10 @@ export function validateZipFile(file: File): { valid: boolean; error?: string }
return { valid: false, error: '请上传ZIP格式的文件' };
}
// 检查文件大小 (限制为100MB)
const maxSize = 100 * 1024 * 1024;
// 检查文件大小 (限制为500MB)
const maxSize = 500 * 1024 * 1024;
if (file.size > maxSize) {
return { valid: false, error: '文件大小不能超过100MB' };
return { valid: false, error: '文件大小不能超过500MB' };
}
return { valid: true };

View File

@ -217,7 +217,7 @@ export const LogEntry = memo(function LogEntry({ item, isExpanded, onToggle }: L
{isThinking && item.content && (
<div className="mt-2.5 relative">
<div className="absolute left-0 top-0 bottom-0 w-px bg-gradient-to-b from-purple-500/50 via-purple-500/20 to-transparent" />
<div className="pl-3 text-sm text-purple-200/90 leading-relaxed whitespace-pre-wrap break-words max-h-48 overflow-y-auto custom-scrollbar">
<div className="pl-3 text-sm text-purple-200/90 leading-relaxed whitespace-pre-wrap break-words">
{item.content}
</div>
</div>

View File

@ -636,7 +636,7 @@ export default function Projects() {
<Upload className="w-10 h-10 text-gray-500 mx-auto mb-3 group-hover:text-primary transition-colors" />
<h3 className="text-base font-bold text-gray-300 uppercase mb-1"> ZIP </h3>
<p className="text-[10px] font-mono text-gray-500 mb-3">
最大: 100MB // 格式: .ZIP
最大: 500MB // 格式: .ZIP
</p>
<input
ref={fileInputRef}