From 0fb10f6f762cd399d55e18b14fbe2d7300a2eb29 Mon Sep 17 00:00:00 2001 From: vinland100 Date: Tue, 16 Dec 2025 16:36:08 +0800 Subject: [PATCH] feat: add Gitea repository support --- backend/app/core/config.py | 3 + backend/app/models/project.py | 2 +- backend/app/services/scanner.py | 146 +++++++++++++++--- backend/env.example | 5 + .../src/components/system/SystemConfig.tsx | 26 +++- frontend/src/pages/Projects.tsx | 2 + frontend/src/shared/constants/projectTypes.ts | 32 ++-- frontend/src/shared/types/index.ts | 2 +- frontend/src/shared/utils/projectUtils.ts | 1 + 9 files changed, 177 insertions(+), 42 deletions(-) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 43d6385..536efda 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -65,6 +65,9 @@ class Settings(BaseSettings): # GitLab配置 GITLAB_TOKEN: Optional[str] = None + # Gitea配置 + GITEA_TOKEN: Optional[str] = None + # 扫描配置 MAX_ANALYZE_FILES: int = 0 # 最大分析文件数,0表示无限制 MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB diff --git a/backend/app/models/project.py b/backend/app/models/project.py index ac89beb..5debcbf 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -16,7 +16,7 @@ class Project(Base): # 仓库相关字段 (仅 source_type='repository' 时使用) repository_url = Column(String, nullable=True) - repository_type = Column(String, default="other") # github, gitlab, other + repository_type = Column(String, default="other") # github, gitlab, gitea, other default_branch = Column(String, default="main") programming_languages = Column(Text, default="[]") # Stored as JSON string diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py index 9937e28..fea8968 100644 --- a/backend/app/services/scanner.py +++ b/backend/app/services/scanner.py @@ -1,5 +1,5 @@ """ -仓库扫描服务 - 支持GitHub和GitLab仓库扫描 +仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描 """ import asyncio @@ -98,6 +98,25 @@ async def github_api(url: str, token: str = None) -> Any: return response.json() + +async def gitea_api(url: str, token: str = None) -> Any: + """调用Gitea API""" + headers = {"Content-Type": "application/json"} + t = token or settings.GITEA_TOKEN + if t: + headers["Authorization"] = f"token {t}" + + async with httpx.AsyncClient(timeout=30) as client: + response = await client.get(url, headers=headers) + if response.status_code == 401: + raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限") + if response.status_code == 403: + raise Exception("Gitea API 403:请确认仓库权限/频率限制") + if response.status_code != 200: + raise Exception(f"Gitea API {response.status_code}: {url}") + return response.json() + + async def gitlab_api(url: str, token: str = None) -> Any: """调用GitLab API""" headers = {"Content-Type": "application/json"} @@ -130,7 +149,9 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option async def get_github_branches(repo_url: str, token: str = None) -> List[str]: """获取GitHub仓库分支列表""" - match = repo_url.rstrip('/').rstrip('.git') + match = repo_url.rstrip('/') + if match.endswith('.git'): + match = match[:-4] if 'github.com/' in match: parts = match.split('github.com/')[-1].split('/') if len(parts) >= 2: @@ -146,6 +167,30 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]: return [b["name"] for b in branches_data] + + + +async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]: + """获取Gitea仓库分支列表""" + parsed = urlparse(repo_url) + base = f"{parsed.scheme}://{parsed.netloc}" + + # 提取Owner和Repo: path通常是 /owner/repo.git 或 /owner/repo + path = parsed.path.strip('/') + if path.endswith('.git'): + path = path[:-4] + parts = path.split('/') + if len(parts) < 2: + raise Exception("Gitea 仓库 URL 格式错误") + + owner, repo = parts[0], parts[1] + + branches_url = f"{base}/api/v1/repos/{owner}/{repo}/branches" + branches_data = await gitea_api(branches_url, token) + + return [b["name"] for b in branches_data] + + async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]: """获取GitLab仓库分支列表""" parsed = urlparse(repo_url) @@ -158,7 +203,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]: elif parsed.username and not parsed.password: extracted_token = parsed.username - path = parsed.path.strip('/').rstrip('.git') + path = parsed.path.strip('/') + if path.endswith('.git'): + path = path[:-4] if not path: raise Exception("GitLab 仓库 URL 格式错误") @@ -172,7 +219,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]: async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]: """获取GitHub仓库文件列表""" # 解析仓库URL - match = repo_url.rstrip('/').rstrip('.git') + match = repo_url.rstrip('/') + if match.endswith('.git'): + match = match[:-4] if 'github.com/' in match: parts = match.split('github.com/')[-1].split('/') if len(parts) >= 2: @@ -213,7 +262,9 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud extracted_token = parsed.username # 解析项目路径 - path = parsed.path.strip('/').rstrip('.git') + path = parsed.path.strip('/') + if path.endswith('.git'): + path = path[:-4] if not path: raise Exception("GitLab 仓库 URL 格式错误") @@ -235,6 +286,47 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud return files + +async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]: + """获取Gitea仓库文件列表""" + parsed = urlparse(repo_url) + base = f"{parsed.scheme}://{parsed.netloc}" + + path = parsed.path.strip('/') + if path.endswith('.git'): + path = path[:-4] + parts = path.split('/') + if len(parts) < 2: + raise Exception("Gitea 仓库 URL 格式错误") + + owner, repo = parts[0], parts[1] + + # Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1 + # 可以直接使用分支名作为sha + tree_url = f"{base}/api/v1/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1" + tree_data = await gitea_api(tree_url, token) + + files = [] + for item in tree_data.get("tree", []): + # Gitea API returns 'type': 'blob' for files + if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns): + # Gitea raw file URL: {base}/{owner}/{repo}/raw/branch/{branch}/{path} + # 或者 API: /repos/{owner}/{repo}/contents/{filepath}?ref={branch} (get content, base64) + # 这里使用 raw URL 可能会更方便,但要注意私有仓库可能需要token访问raw + # Gitea raw URL usually works with token in header or query param. + # Standard Gitea: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} (API) returns raw content? + # Actually Gitea raw url: {base}/{owner}/{repo}/raw/branch/{branch}/{path} or /raw/tag or /raw/commit + + # 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} ==> 实际是 /repos/{owner}/{repo}/raw/{path} (ref通过query param?) + # 查阅文档,Gitea API v1 /repos/{owner}/{repo}/raw/{filepath} 接受 ref query param + # URL: {base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={branch} + files.append({ + "path": item["path"], + "url": f"{base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}", + "token": token # 传递token以便fetch_file_content使用 + }) + + return files async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None): """ 后台仓库扫描任务 @@ -293,24 +385,23 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N user_other_config = (user_config or {}).get('otherConfig', {}) github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN + gitea_token = user_other_config.get('giteaToken') or settings.GITEA_TOKEN + + files: List[Dict[str, str]] = [] extracted_gitlab_token = None - - # 构建分支尝试顺序(分支降级机制) - branches_to_try = [branch] - if project.default_branch and project.default_branch != branch: - branches_to_try.append(project.default_branch) - for common_branch in ["main", "master"]: - if common_branch not in branches_to_try: - branches_to_try.append(common_branch) - - actual_branch = branch # 实际使用的分支 last_error = None + actual_branch = branch + + # 构造尝试的分支列表 + branches_to_try = [branch] + if branch not in ["main", "master"]: + branches_to_try.extend(["main", "master"]) + branches_to_try = list(dict.fromkeys(branches_to_try)) for try_branch in branches_to_try: try: - print(f"🔄 尝试获取分支 {try_branch} 的文件列表...") if repo_type == "github": files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns) elif repo_type == "gitlab": @@ -318,8 +409,10 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N # GitLab文件可能带有token if files and 'token' in files[0]: extracted_gitlab_token = files[0].get('token') + elif repo_type == "gitea": + files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns) else: - raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库") + raise Exception("不支持的仓库类型,仅支持 GitHub, GitLab 和 Gitea 仓库") if files: actual_branch = try_branch @@ -386,10 +479,21 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N try: # 获取文件内容 headers = {} - # 使用提取的 GitLab token 或用户配置的 token - token_to_use = extracted_gitlab_token or gitlab_token - if token_to_use: - headers["PRIVATE-TOKEN"] = token_to_use + # 使用提取的 token 或用户配置的 token + + if repo_type == "gitlab": + token_to_use = extracted_token or gitlab_token + if token_to_use: + headers["PRIVATE-TOKEN"] = token_to_use + elif repo_type == "gitea": + token_to_use = extracted_token or gitea_token + if token_to_use: + headers["Authorization"] = f"token {token_to_use}" + elif repo_type == "github": + # GitHub raw URL 也是直接下载,通常public不需要token,private需要 + # GitHub raw user content url: raw.githubusercontent.com + if github_token: + headers["Authorization"] = f"Bearer {github_token}" print(f"📥 正在获取文件: {file_info['path']}") content = await fetch_file_content(file_info["url"], headers) diff --git a/backend/env.example b/backend/env.example index 2fd0d25..37eeb24 100644 --- a/backend/env.example +++ b/backend/env.example @@ -183,6 +183,11 @@ GITHUB_TOKEN= # 权限要求: read_repository GITLAB_TOKEN= +# Gitea Access Token +# 获取地址: https://[your-gitea-instance]/user/settings/applications +# 权限要求: read_repository +GITEA_TOKEN= + # ============================================= # 扫描配置 # ============================================= diff --git a/frontend/src/components/system/SystemConfig.tsx b/frontend/src/components/system/SystemConfig.tsx index 81c1345..3411f85 100644 --- a/frontend/src/components/system/SystemConfig.tsx +++ b/frontend/src/components/system/SystemConfig.tsx @@ -41,7 +41,7 @@ const DEFAULT_MODELS: Record = { interface SystemConfigData { llmProvider: string; llmApiKey: string; llmModel: string; llmBaseUrl: string; llmTimeout: number; llmTemperature: number; llmMaxTokens: number; - githubToken: string; gitlabToken: string; + githubToken: string; gitlabToken: string; giteaToken: string; maxAnalyzeFiles: number; llmConcurrency: number; llmGapMs: number; outputLanguage: string; } @@ -78,6 +78,7 @@ export function SystemConfig() { llmMaxTokens: llmConfig.llmMaxTokens || 4096, githubToken: otherConfig.githubToken || '', gitlabToken: otherConfig.gitlabToken || '', + giteaToken: otherConfig.giteaToken || '', maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0, llmConcurrency: otherConfig.llmConcurrency || 3, llmGapMs: otherConfig.llmGapMs || 2000, @@ -97,7 +98,7 @@ export function SystemConfig() { setConfig({ llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '', llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096, - githubToken: '', gitlabToken: '', + githubToken: '', gitlabToken: '', giteaToken: '', maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN', }); } @@ -106,7 +107,7 @@ export function SystemConfig() { setConfig({ llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '', llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096, - githubToken: '', gitlabToken: '', + githubToken: '', gitlabToken: '', giteaToken: '', maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN', }); } finally { @@ -125,7 +126,7 @@ export function SystemConfig() { llmMaxTokens: config.llmMaxTokens, }, otherConfig: { - githubToken: config.githubToken, gitlabToken: config.gitlabToken, + githubToken: config.githubToken, gitlabToken: config.gitlabToken, giteaToken: config.giteaToken, maxAnalyzeFiles: config.maxAnalyzeFiles, llmConcurrency: config.llmConcurrency, llmGapMs: config.llmGapMs, outputLanguage: config.outputLanguage, }, @@ -144,6 +145,7 @@ export function SystemConfig() { llmMaxTokens: llmConfig.llmMaxTokens || 4096, githubToken: otherConfig.githubToken || '', gitlabToken: otherConfig.gitlabToken || '', + giteaToken: otherConfig.giteaToken || '', maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0, llmConcurrency: otherConfig.llmConcurrency || 3, llmGapMs: otherConfig.llmGapMs || 2000, @@ -526,6 +528,22 @@ export function SystemConfig() {

+
+ + updateConfig('giteaToken', e.target.value)} + placeholder="sha1_xxxxxxxxxxxx" + className="h-10 cyber-input" + /> +

+ 用于访问 Gitea 私有仓库。获取:{' '} + + [your-gitea-instance]/user/settings/applications + +

+

diff --git a/frontend/src/pages/Projects.tsx b/frontend/src/pages/Projects.tsx index 080b509..f01b039 100644 --- a/frontend/src/pages/Projects.tsx +++ b/frontend/src/pages/Projects.tsx @@ -275,6 +275,7 @@ export default function Projects() { switch (type) { case 'github': return ; case 'gitlab': return ; + case 'gitea': return ; default: return ; } }; @@ -486,6 +487,7 @@ export default function Projects() { GITHUB GITLAB + GITEA OTHER diff --git a/frontend/src/shared/constants/projectTypes.ts b/frontend/src/shared/constants/projectTypes.ts index c74fe74..ecb904f 100644 --- a/frontend/src/shared/constants/projectTypes.ts +++ b/frontend/src/shared/constants/projectTypes.ts @@ -10,17 +10,17 @@ export const PROJECT_SOURCE_TYPES: Array<{ label: string; description: string; }> = [ - { - value: 'repository', - label: '远程仓库', - description: '从 GitHub/GitLab 等远程仓库拉取代码' - }, - { - value: 'zip', - label: 'ZIP上传', - description: '上传本地ZIP压缩包进行扫描' - } -]; + { + value: 'repository', + label: '远程仓库', + description: '从 GitHub/GitLab 等远程仓库拉取代码' + }, + { + value: 'zip', + label: 'ZIP上传', + description: '上传本地ZIP压缩包进行扫描' + } + ]; // 仓库平台选项 export const REPOSITORY_PLATFORMS: Array<{ @@ -28,10 +28,11 @@ export const REPOSITORY_PLATFORMS: Array<{ label: string; icon?: string; }> = [ - { value: 'github', label: 'GitHub' }, - { value: 'gitlab', label: 'GitLab' }, - { value: 'other', label: '其他' } -]; + { value: 'github', label: 'GitHub' }, + { value: 'gitlab', label: 'GitLab' }, + { value: 'gitea', label: 'Gitea' }, + { value: 'other', label: '其他' } + ]; // 项目来源类型的颜色配置 export const SOURCE_TYPE_COLORS: Record = { github: { bg: 'bg-gray-800', text: 'text-white' }, gitlab: { bg: 'bg-orange-500', text: 'text-white' }, + gitea: { bg: 'bg-green-600', text: 'text-white' }, other: { bg: 'bg-gray-500', text: 'text-white' } }; diff --git a/frontend/src/shared/types/index.ts b/frontend/src/shared/types/index.ts index 666222e..db57b01 100644 --- a/frontend/src/shared/types/index.ts +++ b/frontend/src/shared/types/index.ts @@ -24,7 +24,7 @@ export interface Profile { export type ProjectSourceType = 'repository' | 'zip'; // 仓库平台类型 -export type RepositoryPlatform = 'github' | 'gitlab' | 'other'; +export type RepositoryPlatform = 'github' | 'gitlab' | 'gitea' | 'other'; // 项目相关类型 export interface Project { diff --git a/frontend/src/shared/utils/projectUtils.ts b/frontend/src/shared/utils/projectUtils.ts index 1ade33d..07bc437 100644 --- a/frontend/src/shared/utils/projectUtils.ts +++ b/frontend/src/shared/utils/projectUtils.ts @@ -48,6 +48,7 @@ export function getRepositoryPlatformLabel(platform?: string): string { const labels: Record = { github: 'GitHub', gitlab: 'GitLab', + gitea: 'Gitea', other: '其他' }; return labels[platform || 'other'] || '其他';