feat: Implement robust task cancellation and cleanup for file analysis tasks.
This commit is contained in:
parent
05db656fd1
commit
0f9c1e2bc9
|
|
@ -202,11 +202,13 @@ class LLMService:
|
||||||
2. column 是问题代码在该行中的起始列位置
|
2. column 是问题代码在该行中的起始列位置
|
||||||
3. code_snippet 应该包含问题代码及其上下文,去掉"行号|"前缀
|
3. code_snippet 应该包含问题代码及其上下文,去掉"行号|"前缀
|
||||||
4. 如果代码片段包含多行,必须使用 \\n 表示换行符
|
4. 如果代码片段包含多行,必须使用 \\n 表示换行符
|
||||||
|
5. 禁止在 code_snippet 中输出大量的重复空白、多余空行或过长的无关代码。如果涉及此类问题,请使用 "[...]" 进行省略展示。单次 code_snippet 的行数建议不超过 10 行。
|
||||||
|
|
||||||
【严格禁止】:
|
【严格禁止】:
|
||||||
- 禁止在任何字段中使用英文,所有内容必须是简体中文
|
- 禁止在任何字段中使用英文,所有内容必须是简体中文
|
||||||
- 禁止在JSON字符串值中使用真实换行符,必须用\\n转义
|
- 禁止在JSON字符串值中使用真实换行符,必须用\\n转义
|
||||||
- 禁止输出markdown代码块标记(如```json)
|
- 禁止输出markdown代码块标记(如```json)
|
||||||
|
- 禁止输出重复的、冗余的长代码块
|
||||||
|
|
||||||
⚠️ 重要提醒:line字段必须从代码左侧的行号标注中读取,不要猜测或填0!"""
|
⚠️ 重要提醒:line字段必须从代码左侧的行号标注中读取,不要猜测或填0!"""
|
||||||
else:
|
else:
|
||||||
|
|
@ -253,11 +255,13 @@ Note:
|
||||||
2. 'column' is the starting column position
|
2. 'column' is the starting column position
|
||||||
3. 'code_snippet' should include the problematic code with context, remove "lineNumber|" prefix
|
3. 'code_snippet' should include the problematic code with context, remove "lineNumber|" prefix
|
||||||
4. Use \\n for newlines in code snippets
|
4. Use \\n for newlines in code snippets
|
||||||
|
5. DO NOT output massive amounts of empty lines, repeated spaces, or excessively long code in 'code_snippet'. Use "[...]" to indicate truncation if necessary. Keep the snippet under 10 lines.
|
||||||
|
|
||||||
【STRICTLY PROHIBITED】:
|
【STRICTLY PROHIBITED】:
|
||||||
- NO Chinese characters in any field - English ONLY
|
- NO Chinese characters in any field - English ONLY
|
||||||
- NO real newline characters in JSON string values
|
- NO real newline characters in JSON string values (must use \\n)
|
||||||
- NO markdown code block markers
|
- NO markdown code block markers
|
||||||
|
- NO redundant long code blocks
|
||||||
|
|
||||||
⚠️ CRITICAL: Read line numbers from the "lineNumber|" prefix. Do NOT guess or use 0!"""
|
⚠️ CRITICAL: Read line numbers from the "lineNumber|" prefix. Do NOT guess or use 0!"""
|
||||||
|
|
||||||
|
|
@ -851,7 +855,9 @@ Please analyze the following code:
|
||||||
1. 必须只输出纯JSON对象
|
1. 必须只输出纯JSON对象
|
||||||
2. 禁止在JSON前后添加任何文字、说明、markdown标记
|
2. 禁止在JSON前后添加任何文字、说明、markdown标记
|
||||||
3. 所有文本字段(title, description, suggestion等)必须使用中文输出
|
3. 所有文本字段(title, description, suggestion等)必须使用中文输出
|
||||||
4. 输出格式必须符合以下 JSON Schema:
|
4. code_snippet 字段禁止输出大量的重复空白、多余空行或过长的无关代码。如果涉及此类问题,请使用 "[...]" 进行省略展示。单次 code_snippet 的行数建议不超过 10 行。
|
||||||
|
5. 禁止在JSON字符串值中使用真实换行符,必须用\\n转义
|
||||||
|
6. 输出格式必须严格符合以下 JSON Schema:
|
||||||
|
|
||||||
{schema}
|
{schema}
|
||||||
{rules_prompt}"""
|
{rules_prompt}"""
|
||||||
|
|
@ -862,7 +868,9 @@ Please analyze the following code:
|
||||||
1. Must output pure JSON object only
|
1. Must output pure JSON object only
|
||||||
2. Do not add any text, explanation, or markdown markers before or after JSON
|
2. Do not add any text, explanation, or markdown markers before or after JSON
|
||||||
3. All text fields (title, description, suggestion, etc.) must be in English
|
3. All text fields (title, description, suggestion, etc.) must be in English
|
||||||
4. Output format must conform to the following JSON Schema:
|
4. DO NOT output massive amounts of empty lines, repeated spaces, or excessively long code in 'code_snippet'. Use "[...]" for truncation. Keep it under 10 lines.
|
||||||
|
5. NO real newline characters in JSON string values (must use \\n)
|
||||||
|
6. Output format must strictly conform to the following JSON Schema:
|
||||||
|
|
||||||
{schema}
|
{schema}
|
||||||
{rules_prompt}"""
|
{rules_prompt}"""
|
||||||
|
|
|
||||||
|
|
@ -599,6 +599,9 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
if len(content) > settings.MAX_FILE_SIZE_BYTES:
|
if len(content) > settings.MAX_FILE_SIZE_BYTES:
|
||||||
return {"type": "skip", "reason": "too_large", "path": f_path}
|
return {"type": "skip", "reason": "too_large", "path": f_path}
|
||||||
|
|
||||||
|
if task_control.is_cancelled(task_id):
|
||||||
|
return None
|
||||||
|
|
||||||
# 4.2 LLM 分析
|
# 4.2 LLM 分析
|
||||||
language = get_language_from_path(f_path)
|
language = get_language_from_path(f_path)
|
||||||
scan_config = (user_config or {}).get('scan_config', {})
|
scan_config = (user_config or {}).get('scan_config', {})
|
||||||
|
|
@ -622,6 +625,9 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
"language": language,
|
"language": language,
|
||||||
"analysis": analysis_result
|
"analysis": analysis_result
|
||||||
}
|
}
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
# 捕获取消异常,不再重试
|
||||||
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if attempt < MAX_RETRIES - 1:
|
if attempt < MAX_RETRIES - 1:
|
||||||
wait_time = (attempt + 1) * 2
|
wait_time = (attempt + 1) * 2
|
||||||
|
|
@ -638,80 +644,98 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
last_error = str(e)
|
last_error = str(e)
|
||||||
return {"type": "error", "path": f_path, "error": str(e)}
|
return {"type": "error", "path": f_path, "error": str(e)}
|
||||||
|
|
||||||
# 创建所有分析任务
|
# 创建所有分析任务对象以便跟踪
|
||||||
analysis_tasks = [analyze_single_file(f) for f in files]
|
task_objects = [asyncio.create_task(analyze_single_file(f)) for f in files]
|
||||||
|
|
||||||
# 使用 as_completed 处理结果,这样可以实时更新进度且安全使用当前 db session
|
try:
|
||||||
for future in asyncio.as_completed(analysis_tasks):
|
# 使用 as_completed 处理结果,这样可以实时更新进度且安全使用当前 db session
|
||||||
if task_control.is_cancelled(task_id):
|
for future in asyncio.as_completed(task_objects):
|
||||||
# 停止处理后续完成的任务
|
if task_control.is_cancelled(task_id):
|
||||||
continue
|
# 停止处理后续完成的任务
|
||||||
|
print(f"🛑 任务 {task_id} 检测到取消信号,停止主循环")
|
||||||
|
break
|
||||||
|
|
||||||
res = await future
|
try:
|
||||||
if not res: continue
|
res = await future
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
continue
|
||||||
|
|
||||||
if res["type"] == "skip":
|
if not res: continue
|
||||||
skipped_files += 1
|
|
||||||
task.total_files = max(0, task.total_files - 1)
|
if res["type"] == "skip":
|
||||||
elif res["type"] == "error":
|
skipped_files += 1
|
||||||
failed_files += 1
|
task.total_files = max(0, task.total_files - 1)
|
||||||
consecutive_failures += 1
|
elif res["type"] == "error":
|
||||||
elif res["type"] == "success":
|
failed_files += 1
|
||||||
consecutive_failures = 0
|
consecutive_failures += 1
|
||||||
scanned_files += 1
|
elif res["type"] == "success":
|
||||||
|
consecutive_failures = 0
|
||||||
|
scanned_files += 1
|
||||||
|
|
||||||
|
f_path = res["path"]
|
||||||
|
analysis = res["analysis"]
|
||||||
|
file_lines = res["content"].split('\n')
|
||||||
|
total_lines += len(file_lines)
|
||||||
|
|
||||||
|
# 保存问题
|
||||||
|
issues = analysis.get("issues", [])
|
||||||
|
for issue in issues:
|
||||||
|
line_num = issue.get("line", 1)
|
||||||
|
code_snippet = issue.get("code_snippet")
|
||||||
|
if not code_snippet or len(code_snippet.strip()) < 5:
|
||||||
|
try:
|
||||||
|
idx = max(0, int(line_num) - 1)
|
||||||
|
start = max(0, idx - 2)
|
||||||
|
end = min(len(file_lines), idx + 3)
|
||||||
|
code_snippet = '\n'.join(file_lines[start:end])
|
||||||
|
except Exception:
|
||||||
|
code_snippet = ""
|
||||||
|
|
||||||
|
audit_issue = AuditIssue(
|
||||||
|
task_id=task.id,
|
||||||
|
file_path=f_path,
|
||||||
|
line_number=line_num,
|
||||||
|
column_number=issue.get("column"),
|
||||||
|
issue_type=issue.get("type", "maintainability"),
|
||||||
|
severity=issue.get("severity", "low"),
|
||||||
|
title=issue.get("title", "Issue"),
|
||||||
|
message=issue.get("description") or issue.get("title", "Issue"),
|
||||||
|
suggestion=issue.get("suggestion"),
|
||||||
|
code_snippet=code_snippet,
|
||||||
|
ai_explanation=issue.get("ai_explanation"),
|
||||||
|
status="open"
|
||||||
|
)
|
||||||
|
db.add(audit_issue)
|
||||||
|
total_issues += 1
|
||||||
|
|
||||||
|
if "quality_score" in analysis:
|
||||||
|
quality_scores.append(analysis["quality_score"])
|
||||||
|
|
||||||
|
# 更新主任务进度
|
||||||
|
processed_count = scanned_files + failed_files
|
||||||
|
task.scanned_files = processed_count
|
||||||
|
task.total_lines = total_lines
|
||||||
|
task.issues_count = total_issues
|
||||||
|
await db.commit() # 这里的 commit 是在一个协程里按序进行的,是安全的
|
||||||
|
|
||||||
f_path = res["path"]
|
if processed_count % 10 == 0 or processed_count == len(files):
|
||||||
analysis = res["analysis"]
|
print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")
|
||||||
file_lines = res["content"].split('\n')
|
|
||||||
total_lines += len(file_lines)
|
|
||||||
|
|
||||||
# 保存问题
|
|
||||||
issues = analysis.get("issues", [])
|
|
||||||
for issue in issues:
|
|
||||||
line_num = issue.get("line", 1)
|
|
||||||
code_snippet = issue.get("code_snippet")
|
|
||||||
if not code_snippet or len(code_snippet.strip()) < 5:
|
|
||||||
try:
|
|
||||||
idx = max(0, int(line_num) - 1)
|
|
||||||
start = max(0, idx - 2)
|
|
||||||
end = min(len(file_lines), idx + 3)
|
|
||||||
code_snippet = '\n'.join(file_lines[start:end])
|
|
||||||
except Exception:
|
|
||||||
code_snippet = ""
|
|
||||||
|
|
||||||
audit_issue = AuditIssue(
|
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
||||||
task_id=task.id,
|
print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次,停止分析")
|
||||||
file_path=f_path,
|
break
|
||||||
line_number=line_num,
|
finally:
|
||||||
column_number=issue.get("column"),
|
# 无论正常结束、中途 break 还是发生异常,都确保取消所有未完成的任务
|
||||||
issue_type=issue.get("type", "maintainability"),
|
pending_count = 0
|
||||||
severity=issue.get("severity", "low"),
|
for t in task_objects:
|
||||||
title=issue.get("title", "Issue"),
|
if not t.done():
|
||||||
message=issue.get("description") or issue.get("title", "Issue"),
|
t.cancel()
|
||||||
suggestion=issue.get("suggestion"),
|
pending_count += 1
|
||||||
code_snippet=code_snippet,
|
|
||||||
ai_explanation=issue.get("ai_explanation"),
|
|
||||||
status="open"
|
|
||||||
)
|
|
||||||
db.add(audit_issue)
|
|
||||||
total_issues += 1
|
|
||||||
|
|
||||||
if "quality_score" in analysis:
|
|
||||||
quality_scores.append(analysis["quality_score"])
|
|
||||||
|
|
||||||
# 更新主任务进度
|
|
||||||
processed_count = scanned_files + failed_files
|
|
||||||
task.scanned_files = processed_count
|
|
||||||
task.total_lines = total_lines
|
|
||||||
task.issues_count = total_issues
|
|
||||||
await db.commit() # 这里的 commit 是在一个协程里按序进行的,是安全的
|
|
||||||
|
|
||||||
if processed_count % 10 == 0 or processed_count == len(files):
|
if pending_count > 0:
|
||||||
print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")
|
print(f"🧹 任务 {task_id}: 已清理 {pending_count} 个后台待处理或执行中的任务")
|
||||||
|
# 等待一下让取消逻辑执行完毕,但不阻塞太久
|
||||||
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
await asyncio.gather(*task_objects, return_exceptions=True)
|
||||||
print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次,停止分析")
|
|
||||||
break
|
|
||||||
|
|
||||||
# 5. 完成任务
|
# 5. 完成任务
|
||||||
avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
|
avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
server {
|
server {
|
||||||
listen 80;
|
listen 80;
|
||||||
|
deny 111.194.138.35;# 封禁攻击的ip
|
||||||
server_name localhost;
|
server_name localhost;
|
||||||
root /usr/share/nginx/html;
|
root /usr/share/nginx/html;
|
||||||
index index.html;
|
index index.html;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue