feat(scanner): add file skipping logic and enhanced debug logging

- Add skipped_files counter to track empty and oversized files
- Implement file content validation to skip empty files
- Add file size check to skip files exceeding MAX_FILE_SIZE_BYTES limit
- Add detailed debug logging for file fetching, LLM analysis, and completion
- Add comprehensive error logging with traceback information for failed files
- Improve task status determination logic to distinguish between skipped files and LLM failures
- Mark tasks as completed when all files are empty/skipped instead of failing
- Update error messages to show actual failed file count instead of total files
- Enhance observability during code analysis workflow with emoji-prefixed status messages
This commit is contained in:
lintsinghua 2025-12-06 21:21:36 +08:00
parent cd80517f2d
commit 40bd35ab08
1 changed files with 27 additions and 4 deletions

View File

@ -327,6 +327,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
quality_scores = []
scanned_files = 0
failed_files = 0
skipped_files = 0 # 跳过的文件(空文件、太大等)
consecutive_failures = 0
MAX_CONSECUTIVE_FAILURES = 5
@ -353,19 +354,26 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
if token_to_use:
headers["PRIVATE-TOKEN"] = token_to_use
print(f"📥 正在获取文件: {file_info['path']}")
content = await fetch_file_content(file_info["url"], headers)
if not content:
if not content or not content.strip():
print(f"⚠️ 文件内容为空,跳过: {file_info['path']}")
skipped_files += 1
continue
if len(content) > settings.MAX_FILE_SIZE_BYTES:
print(f"⚠️ 文件太大,跳过: {file_info['path']}")
skipped_files += 1
continue
file_lines = content.split('\n')
total_lines = len(file_lines) + 1
language = get_language_from_path(file_info["path"])
print(f"🤖 正在调用 LLM 分析: {file_info['path']} ({language}, {len(content)} bytes)")
# LLM分析
analysis = await llm_service.analyze_code(content, language)
print(f"✅ LLM 分析完成: {file_info['path']}")
# 再次检查是否取消LLM分析后
if task_control.is_cancelled(task_id):
@ -432,14 +440,29 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
except Exception as file_error:
failed_files += 1
consecutive_failures += 1
# 打印详细错误信息
import traceback
print(f"❌ 分析文件失败 ({file_info['path']}): {file_error}")
print(f" 错误类型: {type(file_error).__name__}")
print(f" 详细信息: {traceback.format_exc()}")
await asyncio.sleep(settings.LLM_GAP_MS / 1000)
# 5. 完成任务
avg_quality_score = sum(quality_scores) / len(quality_scores) if quality_scores else 100.0
# 如果有文件需要分析但全部失败,标记为失败
if len(files) > 0 and scanned_files == 0:
# 判断任务状态
# 如果所有文件都被跳过(空文件等),标记为完成但给出提示
if len(files) > 0 and scanned_files == 0 and skipped_files == len(files):
task.status = "completed"
task.completed_at = datetime.utcnow()
task.scanned_files = 0
task.total_lines = 0
task.issues_count = 0
task.quality_score = 100.0
await db.commit()
print(f"⚠️ 任务 {task_id} 完成: 所有 {len(files)} 个文件均为空或被跳过,无需分析")
# 如果有文件需要分析但全部失败LLM调用失败标记为失败
elif len(files) > 0 and scanned_files == 0 and failed_files > 0:
task.status = "failed"
task.completed_at = datetime.utcnow()
task.scanned_files = 0
@ -447,7 +470,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
task.issues_count = 0
task.quality_score = 0
await db.commit()
print(f"❌ 任务 {task_id} 失败: 所有 {len(files)} 个文件分析均失败,请检查 LLM API 配置")
print(f"❌ 任务 {task_id} 失败: {failed_files} 个文件分析失败,请检查 LLM API 配置")
else:
task.status = "completed"
task.completed_at = datetime.utcnow()