From 0fb10f6f762cd399d55e18b14fbe2d7300a2eb29 Mon Sep 17 00:00:00 2001
From: vinland100
Date: Tue, 16 Dec 2025 16:36:08 +0800
Subject: [PATCH] feat: add Gitea repository support
---
backend/app/core/config.py | 3 +
backend/app/models/project.py | 2 +-
backend/app/services/scanner.py | 146 +++++++++++++++---
backend/env.example | 5 +
.../src/components/system/SystemConfig.tsx | 26 +++-
frontend/src/pages/Projects.tsx | 2 +
frontend/src/shared/constants/projectTypes.ts | 32 ++--
frontend/src/shared/types/index.ts | 2 +-
frontend/src/shared/utils/projectUtils.ts | 1 +
9 files changed, 177 insertions(+), 42 deletions(-)
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 43d6385..536efda 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -65,6 +65,9 @@ class Settings(BaseSettings):
# GitLab配置
GITLAB_TOKEN: Optional[str] = None
+ # Gitea配置
+ GITEA_TOKEN: Optional[str] = None
+
# 扫描配置
MAX_ANALYZE_FILES: int = 0 # 最大分析文件数,0表示无限制
MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB
diff --git a/backend/app/models/project.py b/backend/app/models/project.py
index ac89beb..5debcbf 100644
--- a/backend/app/models/project.py
+++ b/backend/app/models/project.py
@@ -16,7 +16,7 @@ class Project(Base):
# 仓库相关字段 (仅 source_type='repository' 时使用)
repository_url = Column(String, nullable=True)
- repository_type = Column(String, default="other") # github, gitlab, other
+ repository_type = Column(String, default="other") # github, gitlab, gitea, other
default_branch = Column(String, default="main")
programming_languages = Column(Text, default="[]") # Stored as JSON string
diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py
index 9937e28..fea8968 100644
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@@ -1,5 +1,5 @@
"""
-仓库扫描服务 - 支持GitHub和GitLab仓库扫描
+仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描
"""
import asyncio
@@ -98,6 +98,25 @@ async def github_api(url: str, token: str = None) -> Any:
return response.json()
+
+async def gitea_api(url: str, token: str = None) -> Any:
+ """调用Gitea API"""
+ headers = {"Content-Type": "application/json"}
+ t = token or settings.GITEA_TOKEN
+ if t:
+ headers["Authorization"] = f"token {t}"
+
+ async with httpx.AsyncClient(timeout=30) as client:
+ response = await client.get(url, headers=headers)
+ if response.status_code == 401:
+ raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限")
+ if response.status_code == 403:
+ raise Exception("Gitea API 403:请确认仓库权限/频率限制")
+ if response.status_code != 200:
+ raise Exception(f"Gitea API {response.status_code}: {url}")
+ return response.json()
+
+
async def gitlab_api(url: str, token: str = None) -> Any:
"""调用GitLab API"""
headers = {"Content-Type": "application/json"}
@@ -130,7 +149,9 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitHub仓库分支列表"""
- match = repo_url.rstrip('/').rstrip('.git')
+ match = repo_url.rstrip('/')
+ if match.endswith('.git'):
+ match = match[:-4]
if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2:
@@ -146,6 +167,30 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
return [b["name"] for b in branches_data]
+
+
+
+async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
+ """获取Gitea仓库分支列表"""
+ parsed = urlparse(repo_url)
+ base = f"{parsed.scheme}://{parsed.netloc}"
+
+ # 提取Owner和Repo: path通常是 /owner/repo.git 或 /owner/repo
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
+ parts = path.split('/')
+ if len(parts) < 2:
+ raise Exception("Gitea 仓库 URL 格式错误")
+
+ owner, repo = parts[0], parts[1]
+
+ branches_url = f"{base}/api/v1/repos/{owner}/{repo}/branches"
+ branches_data = await gitea_api(branches_url, token)
+
+ return [b["name"] for b in branches_data]
+
+
async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitLab仓库分支列表"""
parsed = urlparse(repo_url)
@@ -158,7 +203,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
elif parsed.username and not parsed.password:
extracted_token = parsed.username
- path = parsed.path.strip('/').rstrip('.git')
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
if not path:
raise Exception("GitLab 仓库 URL 格式错误")
@@ -172,7 +219,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
"""获取GitHub仓库文件列表"""
# 解析仓库URL
- match = repo_url.rstrip('/').rstrip('.git')
+ match = repo_url.rstrip('/')
+ if match.endswith('.git'):
+ match = match[:-4]
if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2:
@@ -213,7 +262,9 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
extracted_token = parsed.username
# 解析项目路径
- path = parsed.path.strip('/').rstrip('.git')
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
if not path:
raise Exception("GitLab 仓库 URL 格式错误")
@@ -235,6 +286,47 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
return files
+
+async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
+ """获取Gitea仓库文件列表"""
+ parsed = urlparse(repo_url)
+ base = f"{parsed.scheme}://{parsed.netloc}"
+
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
+ parts = path.split('/')
+ if len(parts) < 2:
+ raise Exception("Gitea 仓库 URL 格式错误")
+
+ owner, repo = parts[0], parts[1]
+
+ # Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
+ # 可以直接使用分支名作为sha
+ tree_url = f"{base}/api/v1/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
+ tree_data = await gitea_api(tree_url, token)
+
+ files = []
+ for item in tree_data.get("tree", []):
+ # Gitea API returns 'type': 'blob' for files
+ if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
+ # Gitea raw file URL: {base}/{owner}/{repo}/raw/branch/{branch}/{path}
+ # 或者 API: /repos/{owner}/{repo}/contents/{filepath}?ref={branch} (get content, base64)
+ # 这里使用 raw URL 可能会更方便,但要注意私有仓库可能需要token访问raw
+ # Gitea raw URL usually works with token in header or query param.
+ # Standard Gitea: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} (API) returns raw content?
+ # Actually Gitea raw url: {base}/{owner}/{repo}/raw/branch/{branch}/{path} or /raw/tag or /raw/commit
+
+ # 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} ==> 实际是 /repos/{owner}/{repo}/raw/{path} (ref通过query param?)
+ # 查阅文档,Gitea API v1 /repos/{owner}/{repo}/raw/{filepath} 接受 ref query param
+ # URL: {base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={branch}
+ files.append({
+ "path": item["path"],
+ "url": f"{base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
+ "token": token # 传递token以便fetch_file_content使用
+ })
+
+ return files
async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
"""
后台仓库扫描任务
@@ -293,24 +385,23 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
user_other_config = (user_config or {}).get('otherConfig', {})
github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN
gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN
+ gitea_token = user_other_config.get('giteaToken') or settings.GITEA_TOKEN
+
+
files: List[Dict[str, str]] = []
extracted_gitlab_token = None
-
- # 构建分支尝试顺序(分支降级机制)
- branches_to_try = [branch]
- if project.default_branch and project.default_branch != branch:
- branches_to_try.append(project.default_branch)
- for common_branch in ["main", "master"]:
- if common_branch not in branches_to_try:
- branches_to_try.append(common_branch)
-
- actual_branch = branch # 实际使用的分支
last_error = None
+ actual_branch = branch
+
+ # 构造尝试的分支列表
+ branches_to_try = [branch]
+ if branch not in ["main", "master"]:
+ branches_to_try.extend(["main", "master"])
+ branches_to_try = list(dict.fromkeys(branches_to_try))
for try_branch in branches_to_try:
try:
- print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
if repo_type == "github":
files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
elif repo_type == "gitlab":
@@ -318,8 +409,10 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
# GitLab文件可能带有token
if files and 'token' in files[0]:
extracted_gitlab_token = files[0].get('token')
+ elif repo_type == "gitea":
+ files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
else:
- raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库")
+ raise Exception("不支持的仓库类型,仅支持 GitHub, GitLab 和 Gitea 仓库")
if files:
actual_branch = try_branch
@@ -386,10 +479,21 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
try:
# 获取文件内容
headers = {}
- # 使用提取的 GitLab token 或用户配置的 token
- token_to_use = extracted_gitlab_token or gitlab_token
- if token_to_use:
- headers["PRIVATE-TOKEN"] = token_to_use
+ # 使用提取的 token 或用户配置的 token
+
+ if repo_type == "gitlab":
+ token_to_use = extracted_token or gitlab_token
+ if token_to_use:
+ headers["PRIVATE-TOKEN"] = token_to_use
+ elif repo_type == "gitea":
+ token_to_use = extracted_token or gitea_token
+ if token_to_use:
+ headers["Authorization"] = f"token {token_to_use}"
+ elif repo_type == "github":
+ # GitHub raw URL 也是直接下载,通常public不需要token,private需要
+ # GitHub raw user content url: raw.githubusercontent.com
+ if github_token:
+ headers["Authorization"] = f"Bearer {github_token}"
print(f"📥 正在获取文件: {file_info['path']}")
content = await fetch_file_content(file_info["url"], headers)
diff --git a/backend/env.example b/backend/env.example
index 2fd0d25..37eeb24 100644
--- a/backend/env.example
+++ b/backend/env.example
@@ -183,6 +183,11 @@ GITHUB_TOKEN=
# 权限要求: read_repository
GITLAB_TOKEN=
+# Gitea Access Token
+# 获取地址: https://[your-gitea-instance]/user/settings/applications
+# 权限要求: read_repository
+GITEA_TOKEN=
+
# =============================================
# 扫描配置
# =============================================
diff --git a/frontend/src/components/system/SystemConfig.tsx b/frontend/src/components/system/SystemConfig.tsx
index 81c1345..3411f85 100644
--- a/frontend/src/components/system/SystemConfig.tsx
+++ b/frontend/src/components/system/SystemConfig.tsx
@@ -41,7 +41,7 @@ const DEFAULT_MODELS: Record = {
interface SystemConfigData {
llmProvider: string; llmApiKey: string; llmModel: string; llmBaseUrl: string;
llmTimeout: number; llmTemperature: number; llmMaxTokens: number;
- githubToken: string; gitlabToken: string;
+ githubToken: string; gitlabToken: string; giteaToken: string;
maxAnalyzeFiles: number; llmConcurrency: number; llmGapMs: number; outputLanguage: string;
}
@@ -78,6 +78,7 @@ export function SystemConfig() {
llmMaxTokens: llmConfig.llmMaxTokens || 4096,
githubToken: otherConfig.githubToken || '',
gitlabToken: otherConfig.gitlabToken || '',
+ giteaToken: otherConfig.giteaToken || '',
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
llmConcurrency: otherConfig.llmConcurrency || 3,
llmGapMs: otherConfig.llmGapMs || 2000,
@@ -97,7 +98,7 @@ export function SystemConfig() {
setConfig({
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
- githubToken: '', gitlabToken: '',
+ githubToken: '', gitlabToken: '', giteaToken: '',
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
});
}
@@ -106,7 +107,7 @@ export function SystemConfig() {
setConfig({
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
- githubToken: '', gitlabToken: '',
+ githubToken: '', gitlabToken: '', giteaToken: '',
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
});
} finally {
@@ -125,7 +126,7 @@ export function SystemConfig() {
llmMaxTokens: config.llmMaxTokens,
},
otherConfig: {
- githubToken: config.githubToken, gitlabToken: config.gitlabToken,
+ githubToken: config.githubToken, gitlabToken: config.gitlabToken, giteaToken: config.giteaToken,
maxAnalyzeFiles: config.maxAnalyzeFiles, llmConcurrency: config.llmConcurrency,
llmGapMs: config.llmGapMs, outputLanguage: config.outputLanguage,
},
@@ -144,6 +145,7 @@ export function SystemConfig() {
llmMaxTokens: llmConfig.llmMaxTokens || 4096,
githubToken: otherConfig.githubToken || '',
gitlabToken: otherConfig.gitlabToken || '',
+ giteaToken: otherConfig.giteaToken || '',
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
llmConcurrency: otherConfig.llmConcurrency || 3,
llmGapMs: otherConfig.llmGapMs || 2000,
@@ -526,6 +528,22 @@ export function SystemConfig() {
+
+
+
updateConfig('giteaToken', e.target.value)}
+ placeholder="sha1_xxxxxxxxxxxx"
+ className="h-10 cyber-input"
+ />
+
+ 用于访问 Gitea 私有仓库。获取:{' '}
+
+ [your-gitea-instance]/user/settings/applications
+
+
+
diff --git a/frontend/src/pages/Projects.tsx b/frontend/src/pages/Projects.tsx
index 080b509..f01b039 100644
--- a/frontend/src/pages/Projects.tsx
+++ b/frontend/src/pages/Projects.tsx
@@ -275,6 +275,7 @@ export default function Projects() {
switch (type) {
case 'github': return ;
case 'gitlab': return ;
+ case 'gitea': return ;
default: return ;
}
};
@@ -486,6 +487,7 @@ export default function Projects() {
GITHUB
GITLAB
+ GITEA
OTHER
diff --git a/frontend/src/shared/constants/projectTypes.ts b/frontend/src/shared/constants/projectTypes.ts
index c74fe74..ecb904f 100644
--- a/frontend/src/shared/constants/projectTypes.ts
+++ b/frontend/src/shared/constants/projectTypes.ts
@@ -10,17 +10,17 @@ export const PROJECT_SOURCE_TYPES: Array<{
label: string;
description: string;
}> = [
- {
- value: 'repository',
- label: '远程仓库',
- description: '从 GitHub/GitLab 等远程仓库拉取代码'
- },
- {
- value: 'zip',
- label: 'ZIP上传',
- description: '上传本地ZIP压缩包进行扫描'
- }
-];
+ {
+ value: 'repository',
+ label: '远程仓库',
+ description: '从 GitHub/GitLab 等远程仓库拉取代码'
+ },
+ {
+ value: 'zip',
+ label: 'ZIP上传',
+ description: '上传本地ZIP压缩包进行扫描'
+ }
+ ];
// 仓库平台选项
export const REPOSITORY_PLATFORMS: Array<{
@@ -28,10 +28,11 @@ export const REPOSITORY_PLATFORMS: Array<{
label: string;
icon?: string;
}> = [
- { value: 'github', label: 'GitHub' },
- { value: 'gitlab', label: 'GitLab' },
- { value: 'other', label: '其他' }
-];
+ { value: 'github', label: 'GitHub' },
+ { value: 'gitlab', label: 'GitLab' },
+ { value: 'gitea', label: 'Gitea' },
+ { value: 'other', label: '其他' }
+ ];
// 项目来源类型的颜色配置
export const SOURCE_TYPE_COLORS: Record = {
github: { bg: 'bg-gray-800', text: 'text-white' },
gitlab: { bg: 'bg-orange-500', text: 'text-white' },
+ gitea: { bg: 'bg-green-600', text: 'text-white' },
other: { bg: 'bg-gray-500', text: 'text-white' }
};
diff --git a/frontend/src/shared/types/index.ts b/frontend/src/shared/types/index.ts
index 666222e..db57b01 100644
--- a/frontend/src/shared/types/index.ts
+++ b/frontend/src/shared/types/index.ts
@@ -24,7 +24,7 @@ export interface Profile {
export type ProjectSourceType = 'repository' | 'zip';
// 仓库平台类型
-export type RepositoryPlatform = 'github' | 'gitlab' | 'other';
+export type RepositoryPlatform = 'github' | 'gitlab' | 'gitea' | 'other';
// 项目相关类型
export interface Project {
diff --git a/frontend/src/shared/utils/projectUtils.ts b/frontend/src/shared/utils/projectUtils.ts
index 1ade33d..07bc437 100644
--- a/frontend/src/shared/utils/projectUtils.ts
+++ b/frontend/src/shared/utils/projectUtils.ts
@@ -48,6 +48,7 @@ export function getRepositoryPlatformLabel(platform?: string): string {
const labels: Record = {
github: 'GitHub',
gitlab: 'GitLab',
+ gitea: 'Gitea',
other: '其他'
};
return labels[platform || 'other'] || '其他';