fix: Enhance robustness of issue and quality score processing in the scanner service by adding defensive checks and error handling.

This commit is contained in:
vinland100 2026-01-30 15:16:51 +08:00
parent 0735834931
commit 18613d533f
2 changed files with 93 additions and 33 deletions

View File

@ -333,16 +333,13 @@ Please analyze the following code:
# 尝试从响应中提取JSON
result = self._parse_json(content)
# 验证和清理结果
result = self._validate_analysis_result(result)
# 记录解析后的问题数量
issues_count = len(result.get("issues", []))
logger.info(f"📊 LLM 分析结果: 发现 {issues_count} 个问题, 质量评分: {result.get('quality_score', 'N/A')}")
# 检查解析结果是否有效(不是默认响应)
if result == self._get_default_response():
error_msg = f"无法解析LLM响应为有效的分析结果 - Provider: {self.config.provider.value}"
logger.error(error_msg)
raise Exception(error_msg)
return result
except Exception as e:
@ -763,6 +760,54 @@ Please analyze the following code:
raise ValueError(f"json-repair returned unexpected type: {type(repaired)}")
def _validate_analysis_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""验证和清理分析结果"""
if not isinstance(result, dict):
logger.warning(f"分析结果不是字典类型: {type(result)}")
return self._get_default_response()
# 确保 issues 是列表,且每个元素都是字典
raw_issues = result.get("issues", [])
if not isinstance(raw_issues, list):
logger.warning(f"issues 字段不是列表类型: {type(raw_issues)}")
raw_issues = []
valid_issues = []
for i, issue in enumerate(raw_issues):
if isinstance(issue, dict):
valid_issues.append(issue)
else:
logger.warning(f"忽略无效的问题格式 (index {i}): {type(issue)}")
result["issues"] = valid_issues
# 确保 quality_score 是数字
score = result.get("quality_score")
if score is None or not isinstance(score, (int, float)):
try:
if score is not None:
result["quality_score"] = int(float(str(score)))
else:
result["quality_score"] = 80
except (ValueError, TypeError):
result["quality_score"] = 80
# 确保 summary 和 metrics 存在
if "summary" not in result or not isinstance(result["summary"], dict):
num_issues = len(valid_issues)
result["summary"] = {
"total_issues": num_issues,
"critical_issues": sum(1 for iss in valid_issues if iss.get("severity") == "critical"),
"high_issues": sum(1 for iss in valid_issues if iss.get("severity") == "high"),
"medium_issues": sum(1 for iss in valid_issues if iss.get("severity") == "medium"),
"low_issues": sum(1 for iss in valid_issues if iss.get("severity") == "low")
}
if "metrics" not in result or not isinstance(result["metrics"], dict):
result["metrics"] = self._get_default_response()["metrics"]
return result
def _get_default_response(self) -> Dict[str, Any]:
"""返回默认响应"""
return {
@ -915,6 +960,8 @@ Please analyze the following code:
raise Exception("LLM返回空响应")
result = self._parse_json(content)
# 验证和清理结果
result = self._validate_analysis_result(result)
return result
except Exception as e:

View File

@ -680,6 +680,12 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
# 保存问题
issues = analysis.get("issues", [])
for issue in issues:
try:
# 防御性检查:确保 issue 是字典
if not isinstance(issue, dict):
print(f"⚠️ 警告: 任务 {task_id} 中文件 {f_path} 的分析结果包含无效的问题格式: {issue}")
continue
line_num = issue.get("line", 1)
code_snippet = issue.get("code_snippet")
if not code_snippet or len(code_snippet.strip()) < 5:
@ -707,9 +713,16 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
)
db.add(audit_issue)
total_issues += 1
except Exception as e:
print(f"⚠️ 处理单个问题时出错 (文件 {f_path}): {e}")
continue
if "quality_score" in analysis:
quality_scores.append(analysis["quality_score"])
try:
quality_score = float(analysis["quality_score"])
quality_scores.append(quality_score)
except (ValueError, TypeError):
pass
# 更新主任务进度
processed_count = scanned_files + failed_files