feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.

This commit is contained in:
vinland100 2026-01-06 10:53:49 +08:00
parent 9ec07a6594
commit c307609eaa
3 changed files with 175 additions and 34 deletions

View File

@ -296,10 +296,11 @@ async def _execute_agent_task(task_id: str):
gitea_token = settings.GITEA_TOKEN
# 解密SSH私钥
user_other_config = user_config.get('otherConfig', {}) if user_config else {}
ssh_private_key = None
if 'sshPrivateKey' in other_config:
if 'sshPrivateKey' in user_other_config:
try:
encrypted_key = other_config['sshPrivateKey']
encrypted_key = user_other_config['sshPrivateKey']
ssh_private_key = decrypt_sensitive_data(encrypted_key)
logger.info("成功解密SSH私钥")
except Exception as e:
@ -2564,6 +2565,39 @@ async def _get_project_root(
break
else:
last_error = result.stderr
# 🔥 如果带认证克隆失败401尝试不带认证再试一次针对公开库
if ("401" in last_error or "Authentication failed" in last_error or "fatal: could not read Username" in last_error) and auth_url != repo_url:
logger.info(f"⚠️ 带认证克隆失败 (401),尝试公开 URL 进行匿名克隆: {branch}")
await emit(f"⚠️ 认证失败,尝试匿名访问分支 {branch}...", "warning")
try:
retry_task = asyncio.create_task(asyncio.to_thread(
subprocess.run,
["git", "clone", "--depth", "1", "--branch", branch, repo_url, base_path],
capture_output=True,
text=True,
timeout=120,
))
while not retry_task.done():
check_cancelled()
try:
result = await asyncio.wait_for(asyncio.shield(retry_task), timeout=1.0)
break
except asyncio.TimeoutError:
continue
if retry_task.done():
result = retry_task.result()
if result.returncode == 0:
logger.info(f"✅ 匿名 Git 克隆成功 (分支: {branch})")
await emit(f"✅ 仓库获取成功 (匿名克隆, 分支: {branch})")
download_success = True
break
else:
last_error = result.stderr
except Exception as e:
logger.warning(f"匿名克隆尝试失败: {e}")
logger.warning(f"克隆失败 (分支 {branch}): {last_error[:200]}")
await emit(f"⚠️ 分支 {branch} 克隆失败...", "warning")
except subprocess.TimeoutExpired:
@ -2639,6 +2673,37 @@ async def _get_project_root(
download_success = True
else:
last_error = result.stderr
# 🔥 如果带认证克隆失败401尝试不带认证再试一次针对公开库
if ("401" in last_error or "Authentication failed" in last_error or "fatal: could not read Username" in last_error) and auth_url != repo_url:
logger.info(f"⚠️ 带认证克隆失败 (401),尝试公开 URL 进行匿名克隆 (默认分支)")
await emit(f"⚠️ 认证失败,尝试匿名访问默认分支...", "warning")
try:
retry_task = asyncio.create_task(asyncio.to_thread(
subprocess.run,
["git", "clone", "--depth", "1", repo_url, base_path],
capture_output=True,
text=True,
timeout=120,
))
while not retry_task.done():
check_cancelled()
try:
result = await asyncio.wait_for(asyncio.shield(retry_task), timeout=1.0)
break
except asyncio.TimeoutError:
continue
if retry_task.done():
result = retry_task.result()
if result.returncode == 0:
logger.info(f"✅ 匿名 Git 克隆成功 (默认分支)")
await emit(f"✅ 仓库获取成功 (匿名克隆, 默认分支)")
download_success = True
else:
last_error = result.stderr
except Exception as e:
logger.warning(f"匿名克隆尝试失败: {e}")
except subprocess.TimeoutExpired:
last_error = "克隆超时"
except asyncio.CancelledError:

View File

@ -358,8 +358,21 @@ Please analyze the following code:
except Exception as e:
logger.error(f"LLM Analysis failed: {e}", exc_info=True)
logger.error(f"Provider: {self.config.provider.value}, Model: {self.config.model}")
# 重新抛出异常,让调用者处理
raise
# 转换为更具描述性的错误消息
error_str = str(e)
if "401" in error_str:
error_msg = f"LLM 认证失败 (401): 请检查 {self.config.provider.value} API Key 是否正确"
elif "404" in error_str:
error_msg = f"LLM 模型不存在 (404): 请检查模型名称 '{self.config.model}' 是否正确"
elif "429" in error_str:
error_msg = f"LLM 额度不足或频率限制 (429): 请检查账户余额或稍后重试"
elif "timeout" in error_str.lower():
error_msg = f"LLM 请求超时: 请检查网络连接或增加超时时间"
else:
error_msg = f"LLM 分析失败: {error_str[:200]}"
raise Exception(error_msg)
async def chat_completion(
self,

View File

@ -106,21 +106,38 @@ async def github_api(url: str, token: str = None) -> Any:
"""调用GitHub API"""
headers = {"Accept": "application/vnd.github+json"}
t = token or settings.GITHUB_TOKEN
if t:
headers["Authorization"] = f"Bearer {t}"
async with httpx.AsyncClient(timeout=30) as client:
# First try with token if available
if t:
headers["Authorization"] = f"Bearer {t}"
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code != 401:
if response.status_code == 403:
raise Exception("GitHub API 403请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
if response.status_code != 200:
raise Exception(f"GitHub API {response.status_code}: {url}")
# If 401, fall through to retry without token
print(f"[API] GitHub API 401 (Unauthorized) with token, retrying without token for: {url}")
except Exception as e:
if "GitHub API 401" not in str(e) and "401" not in str(e):
raise
data = response.json()
if not isinstance(data, (list, dict)):
print(f"[API] 警告: GitHub API 返回了非预期的格式: {type(data)}")
return data
# Try without token
if "Authorization" in headers:
del headers["Authorization"]
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code == 403:
raise Exception("GitHub API 403请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
if response.status_code == 401:
raise Exception("GitHub API 401请配置 GITHUB_TOKEN 或确认仓库权限")
raise Exception(f"GitHub API {response.status_code}: {url}")
except Exception as e:
print(f"[API] GitHub API 调用失败: {url}, 错误: {e}")
raise
@ -131,23 +148,38 @@ async def gitea_api(url: str, token: str = None) -> Any:
"""调用Gitea API"""
headers = {"Content-Type": "application/json"}
t = token or settings.GITEA_TOKEN
if t:
headers["Authorization"] = f"token {t}"
async with httpx.AsyncClient(timeout=30) as client:
# First try with token if available
if t:
headers["Authorization"] = f"token {t}"
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code != 401:
if response.status_code == 403:
raise Exception("Gitea API 403请确认仓库权限/频率限制")
raise Exception(f"Gitea API {response.status_code}: {url}")
# If 401, fall through to retry without token
print(f"[API] Gitea API 401 (Unauthorized) with token, retrying without token for: {url}")
except Exception as e:
if "Gitea API 401" not in str(e) and "401" not in str(e):
raise
# Try without token
if "Authorization" in headers:
del headers["Authorization"]
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code == 401:
raise Exception("Gitea API 401请配置 GITEA_TOKEN 或确认仓库权限")
if response.status_code == 403:
raise Exception("Gitea API 403请确认仓库权限/频率限制")
if response.status_code != 200:
raise Exception(f"Gitea API {response.status_code}: {url}")
data = response.json()
if not isinstance(data, (list, dict)):
print(f"[API] 警告: Gitea API 返回了非预期的格式: {type(data)}")
return data
except Exception as e:
print(f"[API] Gitea API 调用失败: {url}, 错误: {e}")
raise
@ -157,23 +189,38 @@ async def gitlab_api(url: str, token: str = None) -> Any:
"""调用GitLab API"""
headers = {"Content-Type": "application/json"}
t = token or settings.GITLAB_TOKEN
if t:
headers["PRIVATE-TOKEN"] = t
async with httpx.AsyncClient(timeout=30) as client:
# First try with token if available
if t:
headers["PRIVATE-TOKEN"] = t
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code != 401:
if response.status_code == 403:
raise Exception("GitLab API 403请确认仓库权限/频率限制")
raise Exception(f"GitLab API {response.status_code}: {url}")
# If 401, fall through to retry without token
print(f"[API] GitLab API 401 (Unauthorized) with token, retrying without token for: {url}")
except Exception as e:
if "GitLab API 401" not in str(e) and "401" not in str(e):
raise
# Try without token
if "PRIVATE-TOKEN" in headers:
del headers["PRIVATE-TOKEN"]
try:
response = await client.get(url, headers=headers)
if response.status_code == 200:
return response.json()
if response.status_code == 401:
raise Exception("GitLab API 401请配置 GITLAB_TOKEN 或确认仓库权限")
if response.status_code == 403:
raise Exception("GitLab API 403请确认仓库权限/频率限制")
if response.status_code != 200:
raise Exception(f"GitLab API {response.status_code}: {url}")
data = response.json()
if not isinstance(data, (list, dict)):
print(f"[API] 警告: GitLab API 返回了非预期的格式: {type(data)}")
return data
except Exception as e:
print(f"[API] GitLab API 调用失败: {url}, 错误: {e}")
raise
@ -186,6 +233,14 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
response = await client.get(url, headers=headers or {})
if response.status_code == 200:
return response.text
# 如果带 Token 请求失败401/403尝试不带 Token 请求(针对公开仓库)
if response.status_code in (401, 403) and headers:
print(f"[API] 获取文件内容返回 {response.status_code},尝试不带 Token 重试: {url}")
response = await client.get(url)
if response.status_code == 200:
return response.text
except Exception as e:
print(f"获取文件内容失败: {url}, 错误: {e}")
return None
@ -313,9 +368,9 @@ async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude
base_url = repo_info['base_url']
owner, repo = repo_info['owner'], repo_info['repo']
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=true
# 可以直接使用分支名作为sha
tree_url = f"{base_url}/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
tree_url = f"{base_url}/repos/{quote(owner)}/{quote(repo)}/git/trees/{quote(branch)}?recursive=true"
tree_data = await gitea_api(tree_url, token)
files = []
@ -392,6 +447,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
# 获取SSH私钥如果配置了
user_other_config = user_config.get('otherConfig', {}) if user_config else {}
ssh_private_key = None
if 'sshPrivateKey' in user_other_config:
from app.core.encryption import decrypt_sensitive_data
@ -499,6 +555,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
skipped_files = 0 # 跳过的文件(空文件、太大等)
consecutive_failures = 0
MAX_CONSECUTIVE_FAILURES = 5
last_error = None
for file_info in files:
# 检查是否取消
@ -521,6 +578,8 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
if is_ssh_url:
# SSH方式已经包含了文件内容
content = file_info.get('content', '')
if not content:
print(f"⚠️ SSH文件内容为空: {file_info['path']}")
print(f"📥 正在处理SSH文件: {file_info['path']}")
else:
headers = {}
@ -668,8 +727,12 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
task.total_lines = total_lines
task.issues_count = 0
task.quality_score = 0
# 尝试从最后一个错误中获取更详细的系统提示
error_msg = f"{failed_files} 个文件分析失败,请检查 LLM API 配置。最近一个错误: {str(last_error) if 'last_error' in locals() else '未知错误'}"
task.error_message = error_msg
await db.commit()
print(f"❌ 任务 {task_id} 失败: {failed_files} 个文件分析失败,请检查 LLM API 配置")
print(f"❌ 任务 {task_id} 失败: {error_msg}")
else:
task.status = "completed"
task.completed_at = datetime.now(timezone.utc)