feat: Implement API retry logic for GitHub, Gitea, and GitLab on 401/403 errors and enhance scan task error reporting.
This commit is contained in:
parent
9ec07a6594
commit
c307609eaa
|
|
@ -296,10 +296,11 @@ async def _execute_agent_task(task_id: str):
|
||||||
gitea_token = settings.GITEA_TOKEN
|
gitea_token = settings.GITEA_TOKEN
|
||||||
|
|
||||||
# 解密SSH私钥
|
# 解密SSH私钥
|
||||||
|
user_other_config = user_config.get('otherConfig', {}) if user_config else {}
|
||||||
ssh_private_key = None
|
ssh_private_key = None
|
||||||
if 'sshPrivateKey' in other_config:
|
if 'sshPrivateKey' in user_other_config:
|
||||||
try:
|
try:
|
||||||
encrypted_key = other_config['sshPrivateKey']
|
encrypted_key = user_other_config['sshPrivateKey']
|
||||||
ssh_private_key = decrypt_sensitive_data(encrypted_key)
|
ssh_private_key = decrypt_sensitive_data(encrypted_key)
|
||||||
logger.info("成功解密SSH私钥")
|
logger.info("成功解密SSH私钥")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -2564,6 +2565,39 @@ async def _get_project_root(
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
last_error = result.stderr
|
last_error = result.stderr
|
||||||
|
# 🔥 如果带认证克隆失败(401),尝试不带认证再试一次(针对公开库)
|
||||||
|
if ("401" in last_error or "Authentication failed" in last_error or "fatal: could not read Username" in last_error) and auth_url != repo_url:
|
||||||
|
logger.info(f"⚠️ 带认证克隆失败 (401),尝试公开 URL 进行匿名克隆: {branch}")
|
||||||
|
await emit(f"⚠️ 认证失败,尝试匿名访问分支 {branch}...", "warning")
|
||||||
|
try:
|
||||||
|
retry_task = asyncio.create_task(asyncio.to_thread(
|
||||||
|
subprocess.run,
|
||||||
|
["git", "clone", "--depth", "1", "--branch", branch, repo_url, base_path],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=120,
|
||||||
|
))
|
||||||
|
while not retry_task.done():
|
||||||
|
check_cancelled()
|
||||||
|
try:
|
||||||
|
result = await asyncio.wait_for(asyncio.shield(retry_task), timeout=1.0)
|
||||||
|
break
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if retry_task.done():
|
||||||
|
result = retry_task.result()
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
logger.info(f"✅ 匿名 Git 克隆成功 (分支: {branch})")
|
||||||
|
await emit(f"✅ 仓库获取成功 (匿名克隆, 分支: {branch})")
|
||||||
|
download_success = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
last_error = result.stderr
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"匿名克隆尝试失败: {e}")
|
||||||
|
|
||||||
logger.warning(f"克隆失败 (分支 {branch}): {last_error[:200]}")
|
logger.warning(f"克隆失败 (分支 {branch}): {last_error[:200]}")
|
||||||
await emit(f"⚠️ 分支 {branch} 克隆失败...", "warning")
|
await emit(f"⚠️ 分支 {branch} 克隆失败...", "warning")
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
|
|
@ -2639,6 +2673,37 @@ async def _get_project_root(
|
||||||
download_success = True
|
download_success = True
|
||||||
else:
|
else:
|
||||||
last_error = result.stderr
|
last_error = result.stderr
|
||||||
|
# 🔥 如果带认证克隆失败(401),尝试不带认证再试一次(针对公开库)
|
||||||
|
if ("401" in last_error or "Authentication failed" in last_error or "fatal: could not read Username" in last_error) and auth_url != repo_url:
|
||||||
|
logger.info(f"⚠️ 带认证克隆失败 (401),尝试公开 URL 进行匿名克隆 (默认分支)")
|
||||||
|
await emit(f"⚠️ 认证失败,尝试匿名访问默认分支...", "warning")
|
||||||
|
try:
|
||||||
|
retry_task = asyncio.create_task(asyncio.to_thread(
|
||||||
|
subprocess.run,
|
||||||
|
["git", "clone", "--depth", "1", repo_url, base_path],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=120,
|
||||||
|
))
|
||||||
|
while not retry_task.done():
|
||||||
|
check_cancelled()
|
||||||
|
try:
|
||||||
|
result = await asyncio.wait_for(asyncio.shield(retry_task), timeout=1.0)
|
||||||
|
break
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if retry_task.done():
|
||||||
|
result = retry_task.result()
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
logger.info(f"✅ 匿名 Git 克隆成功 (默认分支)")
|
||||||
|
await emit(f"✅ 仓库获取成功 (匿名克隆, 默认分支)")
|
||||||
|
download_success = True
|
||||||
|
else:
|
||||||
|
last_error = result.stderr
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"匿名克隆尝试失败: {e}")
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
last_error = "克隆超时"
|
last_error = "克隆超时"
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
|
|
||||||
|
|
@ -358,8 +358,21 @@ Please analyze the following code:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM Analysis failed: {e}", exc_info=True)
|
logger.error(f"LLM Analysis failed: {e}", exc_info=True)
|
||||||
logger.error(f"Provider: {self.config.provider.value}, Model: {self.config.model}")
|
logger.error(f"Provider: {self.config.provider.value}, Model: {self.config.model}")
|
||||||
# 重新抛出异常,让调用者处理
|
|
||||||
raise
|
# 转换为更具描述性的错误消息
|
||||||
|
error_str = str(e)
|
||||||
|
if "401" in error_str:
|
||||||
|
error_msg = f"LLM 认证失败 (401): 请检查 {self.config.provider.value} API Key 是否正确"
|
||||||
|
elif "404" in error_str:
|
||||||
|
error_msg = f"LLM 模型不存在 (404): 请检查模型名称 '{self.config.model}' 是否正确"
|
||||||
|
elif "429" in error_str:
|
||||||
|
error_msg = f"LLM 额度不足或频率限制 (429): 请检查账户余额或稍后重试"
|
||||||
|
elif "timeout" in error_str.lower():
|
||||||
|
error_msg = f"LLM 请求超时: 请检查网络连接或增加超时时间"
|
||||||
|
else:
|
||||||
|
error_msg = f"LLM 分析失败: {error_str[:200]}"
|
||||||
|
|
||||||
|
raise Exception(error_msg)
|
||||||
|
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -106,21 +106,38 @@ async def github_api(url: str, token: str = None) -> Any:
|
||||||
"""调用GitHub API"""
|
"""调用GitHub API"""
|
||||||
headers = {"Accept": "application/vnd.github+json"}
|
headers = {"Accept": "application/vnd.github+json"}
|
||||||
t = token or settings.GITHUB_TOKEN
|
t = token or settings.GITHUB_TOKEN
|
||||||
if t:
|
|
||||||
headers["Authorization"] = f"Bearer {t}"
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30) as client:
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
|
# First try with token if available
|
||||||
|
if t:
|
||||||
|
headers["Authorization"] = f"Bearer {t}"
|
||||||
|
try:
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
if response.status_code != 401:
|
||||||
|
if response.status_code == 403:
|
||||||
|
raise Exception("GitHub API 403:请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
|
||||||
|
raise Exception(f"GitHub API {response.status_code}: {url}")
|
||||||
|
# If 401, fall through to retry without token
|
||||||
|
print(f"[API] GitHub API 401 (Unauthorized) with token, retrying without token for: {url}")
|
||||||
|
except Exception as e:
|
||||||
|
if "GitHub API 401" not in str(e) and "401" not in str(e):
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Try without token
|
||||||
|
if "Authorization" in headers:
|
||||||
|
del headers["Authorization"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
if response.status_code == 403:
|
if response.status_code == 403:
|
||||||
raise Exception("GitHub API 403:请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
|
raise Exception("GitHub API 403:请配置 GITHUB_TOKEN 或确认仓库权限/频率限制")
|
||||||
if response.status_code != 200:
|
if response.status_code == 401:
|
||||||
raise Exception(f"GitHub API {response.status_code}: {url}")
|
raise Exception("GitHub API 401:请配置 GITHUB_TOKEN 或确认仓库权限")
|
||||||
|
raise Exception(f"GitHub API {response.status_code}: {url}")
|
||||||
data = response.json()
|
|
||||||
if not isinstance(data, (list, dict)):
|
|
||||||
print(f"[API] 警告: GitHub API 返回了非预期的格式: {type(data)}")
|
|
||||||
return data
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[API] GitHub API 调用失败: {url}, 错误: {e}")
|
print(f"[API] GitHub API 调用失败: {url}, 错误: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -131,23 +148,38 @@ async def gitea_api(url: str, token: str = None) -> Any:
|
||||||
"""调用Gitea API"""
|
"""调用Gitea API"""
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
t = token or settings.GITEA_TOKEN
|
t = token or settings.GITEA_TOKEN
|
||||||
if t:
|
|
||||||
headers["Authorization"] = f"token {t}"
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30) as client:
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
|
# First try with token if available
|
||||||
|
if t:
|
||||||
|
headers["Authorization"] = f"token {t}"
|
||||||
|
try:
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
if response.status_code != 401:
|
||||||
|
if response.status_code == 403:
|
||||||
|
raise Exception("Gitea API 403:请确认仓库权限/频率限制")
|
||||||
|
raise Exception(f"Gitea API {response.status_code}: {url}")
|
||||||
|
# If 401, fall through to retry without token
|
||||||
|
print(f"[API] Gitea API 401 (Unauthorized) with token, retrying without token for: {url}")
|
||||||
|
except Exception as e:
|
||||||
|
if "Gitea API 401" not in str(e) and "401" not in str(e):
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Try without token
|
||||||
|
if "Authorization" in headers:
|
||||||
|
del headers["Authorization"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
if response.status_code == 401:
|
if response.status_code == 401:
|
||||||
raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限")
|
raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限")
|
||||||
if response.status_code == 403:
|
if response.status_code == 403:
|
||||||
raise Exception("Gitea API 403:请确认仓库权限/频率限制")
|
raise Exception("Gitea API 403:请确认仓库权限/频率限制")
|
||||||
if response.status_code != 200:
|
raise Exception(f"Gitea API {response.status_code}: {url}")
|
||||||
raise Exception(f"Gitea API {response.status_code}: {url}")
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
if not isinstance(data, (list, dict)):
|
|
||||||
print(f"[API] 警告: Gitea API 返回了非预期的格式: {type(data)}")
|
|
||||||
return data
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[API] Gitea API 调用失败: {url}, 错误: {e}")
|
print(f"[API] Gitea API 调用失败: {url}, 错误: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -157,23 +189,38 @@ async def gitlab_api(url: str, token: str = None) -> Any:
|
||||||
"""调用GitLab API"""
|
"""调用GitLab API"""
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
t = token or settings.GITLAB_TOKEN
|
t = token or settings.GITLAB_TOKEN
|
||||||
if t:
|
|
||||||
headers["PRIVATE-TOKEN"] = t
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30) as client:
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
|
# First try with token if available
|
||||||
|
if t:
|
||||||
|
headers["PRIVATE-TOKEN"] = t
|
||||||
|
try:
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
if response.status_code != 401:
|
||||||
|
if response.status_code == 403:
|
||||||
|
raise Exception("GitLab API 403:请确认仓库权限/频率限制")
|
||||||
|
raise Exception(f"GitLab API {response.status_code}: {url}")
|
||||||
|
# If 401, fall through to retry without token
|
||||||
|
print(f"[API] GitLab API 401 (Unauthorized) with token, retrying without token for: {url}")
|
||||||
|
except Exception as e:
|
||||||
|
if "GitLab API 401" not in str(e) and "401" not in str(e):
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Try without token
|
||||||
|
if "PRIVATE-TOKEN" in headers:
|
||||||
|
del headers["PRIVATE-TOKEN"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
if response.status_code == 401:
|
if response.status_code == 401:
|
||||||
raise Exception("GitLab API 401:请配置 GITLAB_TOKEN 或确认仓库权限")
|
raise Exception("GitLab API 401:请配置 GITLAB_TOKEN 或确认仓库权限")
|
||||||
if response.status_code == 403:
|
if response.status_code == 403:
|
||||||
raise Exception("GitLab API 403:请确认仓库权限/频率限制")
|
raise Exception("GitLab API 403:请确认仓库权限/频率限制")
|
||||||
if response.status_code != 200:
|
raise Exception(f"GitLab API {response.status_code}: {url}")
|
||||||
raise Exception(f"GitLab API {response.status_code}: {url}")
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
if not isinstance(data, (list, dict)):
|
|
||||||
print(f"[API] 警告: GitLab API 返回了非预期的格式: {type(data)}")
|
|
||||||
return data
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[API] GitLab API 调用失败: {url}, 错误: {e}")
|
print(f"[API] GitLab API 调用失败: {url}, 错误: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -186,6 +233,14 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
|
||||||
response = await client.get(url, headers=headers or {})
|
response = await client.get(url, headers=headers or {})
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.text
|
return response.text
|
||||||
|
|
||||||
|
# 如果带 Token 请求失败(401/403),尝试不带 Token 请求(针对公开仓库)
|
||||||
|
if response.status_code in (401, 403) and headers:
|
||||||
|
print(f"[API] 获取文件内容返回 {response.status_code},尝试不带 Token 重试: {url}")
|
||||||
|
response = await client.get(url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.text
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"获取文件内容失败: {url}, 错误: {e}")
|
print(f"获取文件内容失败: {url}, 错误: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
@ -313,9 +368,9 @@ async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude
|
||||||
base_url = repo_info['base_url']
|
base_url = repo_info['base_url']
|
||||||
owner, repo = repo_info['owner'], repo_info['repo']
|
owner, repo = repo_info['owner'], repo_info['repo']
|
||||||
|
|
||||||
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
|
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=true
|
||||||
# 可以直接使用分支名作为sha
|
# 可以直接使用分支名作为sha
|
||||||
tree_url = f"{base_url}/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
tree_url = f"{base_url}/repos/{quote(owner)}/{quote(repo)}/git/trees/{quote(branch)}?recursive=true"
|
||||||
tree_data = await gitea_api(tree_url, token)
|
tree_data = await gitea_api(tree_url, token)
|
||||||
|
|
||||||
files = []
|
files = []
|
||||||
|
|
@ -392,6 +447,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
|
|
||||||
|
|
||||||
# 获取SSH私钥(如果配置了)
|
# 获取SSH私钥(如果配置了)
|
||||||
|
user_other_config = user_config.get('otherConfig', {}) if user_config else {}
|
||||||
ssh_private_key = None
|
ssh_private_key = None
|
||||||
if 'sshPrivateKey' in user_other_config:
|
if 'sshPrivateKey' in user_other_config:
|
||||||
from app.core.encryption import decrypt_sensitive_data
|
from app.core.encryption import decrypt_sensitive_data
|
||||||
|
|
@ -499,6 +555,7 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
skipped_files = 0 # 跳过的文件(空文件、太大等)
|
skipped_files = 0 # 跳过的文件(空文件、太大等)
|
||||||
consecutive_failures = 0
|
consecutive_failures = 0
|
||||||
MAX_CONSECUTIVE_FAILURES = 5
|
MAX_CONSECUTIVE_FAILURES = 5
|
||||||
|
last_error = None
|
||||||
|
|
||||||
for file_info in files:
|
for file_info in files:
|
||||||
# 检查是否取消
|
# 检查是否取消
|
||||||
|
|
@ -521,6 +578,8 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
if is_ssh_url:
|
if is_ssh_url:
|
||||||
# SSH方式已经包含了文件内容
|
# SSH方式已经包含了文件内容
|
||||||
content = file_info.get('content', '')
|
content = file_info.get('content', '')
|
||||||
|
if not content:
|
||||||
|
print(f"⚠️ SSH文件内容为空: {file_info['path']}")
|
||||||
print(f"📥 正在处理SSH文件: {file_info['path']}")
|
print(f"📥 正在处理SSH文件: {file_info['path']}")
|
||||||
else:
|
else:
|
||||||
headers = {}
|
headers = {}
|
||||||
|
|
@ -668,8 +727,12 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
||||||
task.total_lines = total_lines
|
task.total_lines = total_lines
|
||||||
task.issues_count = 0
|
task.issues_count = 0
|
||||||
task.quality_score = 0
|
task.quality_score = 0
|
||||||
|
|
||||||
|
# 尝试从最后一个错误中获取更详细的系统提示
|
||||||
|
error_msg = f"{failed_files} 个文件分析失败,请检查 LLM API 配置。最近一个错误: {str(last_error) if 'last_error' in locals() else '未知错误'}"
|
||||||
|
task.error_message = error_msg
|
||||||
await db.commit()
|
await db.commit()
|
||||||
print(f"❌ 任务 {task_id} 失败: {failed_files} 个文件分析失败,请检查 LLM API 配置")
|
print(f"❌ 任务 {task_id} 失败: {error_msg}")
|
||||||
else:
|
else:
|
||||||
task.status = "completed"
|
task.status = "completed"
|
||||||
task.completed_at = datetime.now(timezone.utc)
|
task.completed_at = datetime.now(timezone.utc)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue