feat: Implement robust task cancellation and cleanup for file analysis tasks.

2026-01-30 14:49:12 +08:00 · 2026-01-30 14:49:12 +08:00 · 0f9c1e2bc9
parent 05db656fd1
commit 0f9c1e2bc9
3 changed files with 104 additions and 71 deletions
--- a/backend/app/services/llm/service.py
+++ b/backend/app/services/llm/service.py
@ -202,11 +202,13 @@ class LLMService:
 2. column 是问题代码在该行中的起始列位置
 3. code_snippet 应该包含问题代码及其上下文，去掉"行号|"前缀
 4. 如果代码片段包含多行，必须使用 \\n 表示换行符
+5. 禁止在 code_snippet 中输出大量的重复空白、多余空行或过长的无关代码。如果涉及此类问题，请使用 "[...]" 进行省略展示。单次 code_snippet 的行数建议不超过 10 行。

 【严格禁止】：
 - 禁止在任何字段中使用英文，所有内容必须是简体中文
 - 禁止在JSON字符串值中使用真实换行符，必须用\\n转义
 - 禁止输出markdown代码块标记（如```json）
+- 禁止输出重复的、冗余的长代码块

 ⚠️ 重要提醒：line字段必须从代码左侧的行号标注中读取，不要猜测或填0！"""
        else:
@ -253,11 +255,13 @@ Note:
 2. 'column' is the starting column position
 3. 'code_snippet' should include the problematic code with context, remove "lineNumber|" prefix
 4. Use \\n for newlines in code snippets
+5. DO NOT output massive amounts of empty lines, repeated spaces, or excessively long code in 'code_snippet'. Use "[...]" to indicate truncation if necessary. Keep the snippet under 10 lines.

 【STRICTLY PROHIBITED】:
 - NO Chinese characters in any field - English ONLY
- NO real newline characters in JSON string values
+- NO real newline characters in JSON string values (must use \\n)
 - NO markdown code block markers
+- NO redundant long code blocks

 ⚠️ CRITICAL: Read line numbers from the "lineNumber|" prefix. Do NOT guess or use 0!"""

@ -851,7 +855,9 @@ Please analyze the following code:
 1. 必须只输出纯JSON对象
 2. 禁止在JSON前后添加任何文字、说明、markdown标记
 3. 所有文本字段（title, description, suggestion等）必须使用中文输出
-4. 输出格式必须符合以下 JSON Schema：
+4. code_snippet 字段禁止输出大量的重复空白、多余空行或过长的无关代码。如果涉及此类问题，请使用 "[...]" 进行省略展示。单次 code_snippet 的行数建议不超过 10 行。
+5. 禁止在JSON字符串值中使用真实换行符，必须用\\n转义
+6. 输出格式必须严格符合以下 JSON Schema：

 {schema}
 {rules_prompt}"""
@ -862,7 +868,9 @@ Please analyze the following code:
 1. Must output pure JSON object only
 2. Do not add any text, explanation, or markdown markers before or after JSON
 3. All text fields (title, description, suggestion, etc.) must be in English
-4. Output format must conform to the following JSON Schema:
+4. DO NOT output massive amounts of empty lines, repeated spaces, or excessively long code in 'code_snippet'. Use "[...]" for truncation. Keep it under 10 lines.
+5. NO real newline characters in JSON string values (must use \\n)
+6. Output format must strictly conform to the following JSON Schema:

 {schema}
 {rules_prompt}"""
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@ -599,6 +599,9 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
                                if len(content) > settings.MAX_FILE_SIZE_BYTES:
                                    return {"type": "skip", "reason": "too_large", "path": f_path}
                            
+                            if task_control.is_cancelled(task_id):
+                                return None
+
                            # 4.2 LLM 分析
                            language = get_language_from_path(f_path)
                            scan_config = (user_config or {}).get('scan_config', {})
@ -622,6 +625,9 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
                                "language": language,
                                "analysis": analysis_result
                            }
+                        except asyncio.CancelledError:
+                            # 捕获取消异常，不再重试
+                            return None
                        except Exception as e:
                            if attempt < MAX_RETRIES - 1:
                                wait_time = (attempt + 1) * 2
@ -638,80 +644,98 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
                                last_error = str(e)
                                return {"type": "error", "path": f_path, "error": str(e)}

-            # 创建所有分析任务
-            analysis_tasks = [analyze_single_file(f) for f in files]
+            # 创建所有分析任务对象以便跟踪
+            task_objects = [asyncio.create_task(analyze_single_file(f)) for f in files]
            
-            # 使用 as_completed 处理结果，这样可以实时更新进度且安全使用当前 db session
-            for future in asyncio.as_completed(analysis_tasks):
-                if task_control.is_cancelled(task_id):
-                    # 停止处理后续完成的任务
-                    continue
+            try:
+                # 使用 as_completed 处理结果，这样可以实时更新进度且安全使用当前 db session
+                for future in asyncio.as_completed(task_objects):
+                    if task_control.is_cancelled(task_id):
+                        # 停止处理后续完成的任务
+                        print(f"🛑 任务 {task_id} 检测到取消信号，停止主循环")
+                        break

-                res = await future
-                if not res: continue
+                    try:
+                        res = await future
+                    except asyncio.CancelledError:
+                        continue

-                if res["type"] == "skip":
-                    skipped_files += 1
-                    task.total_files = max(0, task.total_files - 1)
-                elif res["type"] == "error":
-                    failed_files += 1
-                    consecutive_failures += 1
-                elif res["type"] == "success":
-                    consecutive_failures = 0
-                    scanned_files += 1
+                    if not res: continue
+
+                    if res["type"] == "skip":
+                        skipped_files += 1
+                        task.total_files = max(0, task.total_files - 1)
+                    elif res["type"] == "error":
+                        failed_files += 1
+                        consecutive_failures += 1
+                    elif res["type"] == "success":
+                        consecutive_failures = 0
+                        scanned_files += 1
+                        
+                        f_path = res["path"]
+                        analysis = res["analysis"]
+                        file_lines = res["content"].split('\n')
+                        total_lines += len(file_lines)
+                        
+                        # 保存问题
+                        issues = analysis.get("issues", [])
+                        for issue in issues:
+                            line_num = issue.get("line", 1)
+                            code_snippet = issue.get("code_snippet")
+                            if not code_snippet or len(code_snippet.strip()) < 5:
+                                try:
+                                    idx = max(0, int(line_num) - 1)
+                                    start = max(0, idx - 2)
+                                    end = min(len(file_lines), idx + 3)
+                                    code_snippet = '\n'.join(file_lines[start:end])
+                                except Exception:
+                                    code_snippet = ""
+
+                            audit_issue = AuditIssue(
+                                task_id=task.id,
+                                file_path=f_path,
+                                line_number=line_num,
+                                column_number=issue.get("column"),
+                                issue_type=issue.get("type", "maintainability"),
+                                severity=issue.get("severity", "low"),
+                                title=issue.get("title", "Issue"),
+                                message=issue.get("description") or issue.get("title", "Issue"),
+                                suggestion=issue.get("suggestion"),
+                                code_snippet=code_snippet,
+                                ai_explanation=issue.get("ai_explanation"),
+                                status="open"
+                            )
+                            db.add(audit_issue)
+                            total_issues += 1
+                        
+                        if "quality_score" in analysis:
+                            quality_scores.append(analysis["quality_score"])
+
+                    # 更新主任务进度
+                    processed_count = scanned_files + failed_files
+                    task.scanned_files = processed_count
+                    task.total_lines = total_lines
+                    task.issues_count = total_issues
+                    await db.commit() # 这里的 commit 是在一个协程里按序进行的，是安全的
                    
-                    f_path = res["path"]
-                    analysis = res["analysis"]
-                    file_lines = res["content"].split('\n')
-                    total_lines += len(file_lines)
-                    
-                    # 保存问题
-                    issues = analysis.get("issues", [])
-                    for issue in issues:
-                        line_num = issue.get("line", 1)
-                        code_snippet = issue.get("code_snippet")
-                        if not code_snippet or len(code_snippet.strip()) < 5:
-                            try:
-                                idx = max(0, int(line_num) - 1)
-                                start = max(0, idx - 2)
-                                end = min(len(file_lines), idx + 3)
-                                code_snippet = '\n'.join(file_lines[start:end])
-                            except Exception:
-                                code_snippet = ""
+                    if processed_count % 10 == 0 or processed_count == len(files):
+                        print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")

-                        audit_issue = AuditIssue(
-                            task_id=task.id,
-                            file_path=f_path,
-                            line_number=line_num,
-                            column_number=issue.get("column"),
-                            issue_type=issue.get("type", "maintainability"),
-                            severity=issue.get("severity", "low"),
-                            title=issue.get("title", "Issue"),
-                            message=issue.get("description") or issue.get("title", "Issue"),
-                            suggestion=issue.get("suggestion"),
-                            code_snippet=code_snippet,
-                            ai_explanation=issue.get("ai_explanation"),
-                            status="open"
-                        )
-                        db.add(audit_issue)
-                        total_issues += 1
-                    
-                    if "quality_score" in analysis:
-                        quality_scores.append(analysis["quality_score"])
-
-                # 更新主任务进度
-                processed_count = scanned_files + failed_files
-                task.scanned_files = processed_count
-                task.total_lines = total_lines
-                task.issues_count = total_issues
-                await db.commit() # 这里的 commit 是在一个协程里按序进行的，是安全的
+                    if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
+                        print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次，停止分析")
+                        break
+            finally:
+                # 无论正常结束、中途 break 还是发生异常，都确保取消所有未完成的任务
+                pending_count = 0
+                for t in task_objects:
+                    if not t.done():
+                        t.cancel()
+                        pending_count += 1
                
-                if processed_count % 10 == 0 or processed_count == len(files):
-                    print(f"📈 任务 {task_id}: 进度 {processed_count}/{len(files)} ({int(processed_count/len(files)*100) if len(files) > 0 else 0}%)")
-
-                if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
-                    print(f"❌ 任务 {task_id}: 连续失败 {consecutive_failures} 次，停止分析")
-                    break
+                if pending_count > 0:
+                    print(f"🧹 任务 {task_id}: 已清理 {pending_count} 个后台待处理或执行中的任务")
+                    # 等待一下让取消逻辑执行完毕，但不阻塞太久
+                    await asyncio.gather(*task_objects, return_exceptions=True)

            # 5. 完成任务
            avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@ -1,5 +1,6 @@
 server {
    listen 80;
+    deny 111.194.138.35;# 封禁攻击的ip
    server_name localhost;
    root /usr/share/nginx/html;
    index index.html;