feat: add Gitea repository support

This commit is contained in:
vinland100 2025-12-16 16:36:08 +08:00
parent a336802e26
commit 0fb10f6f76
9 changed files with 177 additions and 42 deletions

View File

@ -65,6 +65,9 @@ class Settings(BaseSettings):
# GitLab配置 # GitLab配置
GITLAB_TOKEN: Optional[str] = None GITLAB_TOKEN: Optional[str] = None
# Gitea配置
GITEA_TOKEN: Optional[str] = None
# 扫描配置 # 扫描配置
MAX_ANALYZE_FILES: int = 0 # 最大分析文件数0表示无限制 MAX_ANALYZE_FILES: int = 0 # 最大分析文件数0表示无限制
MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB

View File

@ -16,7 +16,7 @@ class Project(Base):
# 仓库相关字段 (仅 source_type='repository' 时使用) # 仓库相关字段 (仅 source_type='repository' 时使用)
repository_url = Column(String, nullable=True) repository_url = Column(String, nullable=True)
repository_type = Column(String, default="other") # github, gitlab, other repository_type = Column(String, default="other") # github, gitlab, gitea, other
default_branch = Column(String, default="main") default_branch = Column(String, default="main")
programming_languages = Column(Text, default="[]") # Stored as JSON string programming_languages = Column(Text, default="[]") # Stored as JSON string

View File

@ -1,5 +1,5 @@
""" """
仓库扫描服务 - 支持GitHub和GitLab仓库扫描 仓库扫描服务 - 支持GitHub, GitLab Gitea 仓库扫描
""" """
import asyncio import asyncio
@ -98,6 +98,25 @@ async def github_api(url: str, token: str = None) -> Any:
return response.json() return response.json()
async def gitea_api(url: str, token: str = None) -> Any:
"""调用Gitea API"""
headers = {"Content-Type": "application/json"}
t = token or settings.GITEA_TOKEN
if t:
headers["Authorization"] = f"token {t}"
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url, headers=headers)
if response.status_code == 401:
raise Exception("Gitea API 401请配置 GITEA_TOKEN 或确认仓库权限")
if response.status_code == 403:
raise Exception("Gitea API 403请确认仓库权限/频率限制")
if response.status_code != 200:
raise Exception(f"Gitea API {response.status_code}: {url}")
return response.json()
async def gitlab_api(url: str, token: str = None) -> Any: async def gitlab_api(url: str, token: str = None) -> Any:
"""调用GitLab API""" """调用GitLab API"""
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
@ -130,7 +149,9 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
async def get_github_branches(repo_url: str, token: str = None) -> List[str]: async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitHub仓库分支列表""" """获取GitHub仓库分支列表"""
match = repo_url.rstrip('/').rstrip('.git') match = repo_url.rstrip('/')
if match.endswith('.git'):
match = match[:-4]
if 'github.com/' in match: if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/') parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2: if len(parts) >= 2:
@ -146,6 +167,30 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
return [b["name"] for b in branches_data] return [b["name"] for b in branches_data]
async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
"""获取Gitea仓库分支列表"""
parsed = urlparse(repo_url)
base = f"{parsed.scheme}://{parsed.netloc}"
# 提取Owner和Repo: path通常是 /owner/repo.git 或 /owner/repo
path = parsed.path.strip('/')
if path.endswith('.git'):
path = path[:-4]
parts = path.split('/')
if len(parts) < 2:
raise Exception("Gitea 仓库 URL 格式错误")
owner, repo = parts[0], parts[1]
branches_url = f"{base}/api/v1/repos/{owner}/{repo}/branches"
branches_data = await gitea_api(branches_url, token)
return [b["name"] for b in branches_data]
async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]: async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitLab仓库分支列表""" """获取GitLab仓库分支列表"""
parsed = urlparse(repo_url) parsed = urlparse(repo_url)
@ -158,7 +203,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
elif parsed.username and not parsed.password: elif parsed.username and not parsed.password:
extracted_token = parsed.username extracted_token = parsed.username
path = parsed.path.strip('/').rstrip('.git') path = parsed.path.strip('/')
if path.endswith('.git'):
path = path[:-4]
if not path: if not path:
raise Exception("GitLab 仓库 URL 格式错误") raise Exception("GitLab 仓库 URL 格式错误")
@ -172,7 +219,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]: async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
"""获取GitHub仓库文件列表""" """获取GitHub仓库文件列表"""
# 解析仓库URL # 解析仓库URL
match = repo_url.rstrip('/').rstrip('.git') match = repo_url.rstrip('/')
if match.endswith('.git'):
match = match[:-4]
if 'github.com/' in match: if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/') parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2: if len(parts) >= 2:
@ -213,7 +262,9 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
extracted_token = parsed.username extracted_token = parsed.username
# 解析项目路径 # 解析项目路径
path = parsed.path.strip('/').rstrip('.git') path = parsed.path.strip('/')
if path.endswith('.git'):
path = path[:-4]
if not path: if not path:
raise Exception("GitLab 仓库 URL 格式错误") raise Exception("GitLab 仓库 URL 格式错误")
@ -235,6 +286,47 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
return files return files
async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
"""获取Gitea仓库文件列表"""
parsed = urlparse(repo_url)
base = f"{parsed.scheme}://{parsed.netloc}"
path = parsed.path.strip('/')
if path.endswith('.git'):
path = path[:-4]
parts = path.split('/')
if len(parts) < 2:
raise Exception("Gitea 仓库 URL 格式错误")
owner, repo = parts[0], parts[1]
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
# 可以直接使用分支名作为sha
tree_url = f"{base}/api/v1/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
tree_data = await gitea_api(tree_url, token)
files = []
for item in tree_data.get("tree", []):
# Gitea API returns 'type': 'blob' for files
if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
# Gitea raw file URL: {base}/{owner}/{repo}/raw/branch/{branch}/{path}
# 或者 API: /repos/{owner}/{repo}/contents/{filepath}?ref={branch} (get content, base64)
# 这里使用 raw URL 可能会更方便但要注意私有仓库可能需要token访问raw
# Gitea raw URL usually works with token in header or query param.
# Standard Gitea: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} (API) returns raw content?
# Actually Gitea raw url: {base}/{owner}/{repo}/raw/branch/{branch}/{path} or /raw/tag or /raw/commit
# 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} ==> 实际是 /repos/{owner}/{repo}/raw/{path} (ref通过query param?)
# 查阅文档Gitea API v1 /repos/{owner}/{repo}/raw/{filepath} 接受 ref query param
# URL: {base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={branch}
files.append({
"path": item["path"],
"url": f"{base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
"token": token # 传递token以便fetch_file_content使用
})
return files
async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None): async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
""" """
后台仓库扫描任务 后台仓库扫描任务
@ -293,24 +385,23 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
user_other_config = (user_config or {}).get('otherConfig', {}) user_other_config = (user_config or {}).get('otherConfig', {})
github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN
gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN
gitea_token = user_other_config.get('giteaToken') or settings.GITEA_TOKEN
files: List[Dict[str, str]] = [] files: List[Dict[str, str]] = []
extracted_gitlab_token = None extracted_gitlab_token = None
# 构建分支尝试顺序(分支降级机制)
branches_to_try = [branch]
if project.default_branch and project.default_branch != branch:
branches_to_try.append(project.default_branch)
for common_branch in ["main", "master"]:
if common_branch not in branches_to_try:
branches_to_try.append(common_branch)
actual_branch = branch # 实际使用的分支
last_error = None last_error = None
actual_branch = branch
# 构造尝试的分支列表
branches_to_try = [branch]
if branch not in ["main", "master"]:
branches_to_try.extend(["main", "master"])
branches_to_try = list(dict.fromkeys(branches_to_try))
for try_branch in branches_to_try: for try_branch in branches_to_try:
try: try:
print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
if repo_type == "github": if repo_type == "github":
files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns) files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
elif repo_type == "gitlab": elif repo_type == "gitlab":
@ -318,8 +409,10 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
# GitLab文件可能带有token # GitLab文件可能带有token
if files and 'token' in files[0]: if files and 'token' in files[0]:
extracted_gitlab_token = files[0].get('token') extracted_gitlab_token = files[0].get('token')
elif repo_type == "gitea":
files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
else: else:
raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库") raise Exception("不支持的仓库类型,仅支持 GitHub, GitLab 和 Gitea 仓库")
if files: if files:
actual_branch = try_branch actual_branch = try_branch
@ -386,10 +479,21 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
try: try:
# 获取文件内容 # 获取文件内容
headers = {} headers = {}
# 使用提取的 GitLab token 或用户配置的 token # 使用提取的 token 或用户配置的 token
token_to_use = extracted_gitlab_token or gitlab_token
if token_to_use: if repo_type == "gitlab":
headers["PRIVATE-TOKEN"] = token_to_use token_to_use = extracted_token or gitlab_token
if token_to_use:
headers["PRIVATE-TOKEN"] = token_to_use
elif repo_type == "gitea":
token_to_use = extracted_token or gitea_token
if token_to_use:
headers["Authorization"] = f"token {token_to_use}"
elif repo_type == "github":
# GitHub raw URL 也是直接下载通常public不需要tokenprivate需要
# GitHub raw user content url: raw.githubusercontent.com
if github_token:
headers["Authorization"] = f"Bearer {github_token}"
print(f"📥 正在获取文件: {file_info['path']}") print(f"📥 正在获取文件: {file_info['path']}")
content = await fetch_file_content(file_info["url"], headers) content = await fetch_file_content(file_info["url"], headers)

View File

@ -183,6 +183,11 @@ GITHUB_TOKEN=
# 权限要求: read_repository # 权限要求: read_repository
GITLAB_TOKEN= GITLAB_TOKEN=
# Gitea Access Token
# 获取地址: https://[your-gitea-instance]/user/settings/applications
# 权限要求: read_repository
GITEA_TOKEN=
# ============================================= # =============================================
# 扫描配置 # 扫描配置
# ============================================= # =============================================

View File

@ -41,7 +41,7 @@ const DEFAULT_MODELS: Record<string, string> = {
interface SystemConfigData { interface SystemConfigData {
llmProvider: string; llmApiKey: string; llmModel: string; llmBaseUrl: string; llmProvider: string; llmApiKey: string; llmModel: string; llmBaseUrl: string;
llmTimeout: number; llmTemperature: number; llmMaxTokens: number; llmTimeout: number; llmTemperature: number; llmMaxTokens: number;
githubToken: string; gitlabToken: string; githubToken: string; gitlabToken: string; giteaToken: string;
maxAnalyzeFiles: number; llmConcurrency: number; llmGapMs: number; outputLanguage: string; maxAnalyzeFiles: number; llmConcurrency: number; llmGapMs: number; outputLanguage: string;
} }
@ -78,6 +78,7 @@ export function SystemConfig() {
llmMaxTokens: llmConfig.llmMaxTokens || 4096, llmMaxTokens: llmConfig.llmMaxTokens || 4096,
githubToken: otherConfig.githubToken || '', githubToken: otherConfig.githubToken || '',
gitlabToken: otherConfig.gitlabToken || '', gitlabToken: otherConfig.gitlabToken || '',
giteaToken: otherConfig.giteaToken || '',
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0, maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
llmConcurrency: otherConfig.llmConcurrency || 3, llmConcurrency: otherConfig.llmConcurrency || 3,
llmGapMs: otherConfig.llmGapMs || 2000, llmGapMs: otherConfig.llmGapMs || 2000,
@ -97,7 +98,7 @@ export function SystemConfig() {
setConfig({ setConfig({
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '', llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096, llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
githubToken: '', gitlabToken: '', githubToken: '', gitlabToken: '', giteaToken: '',
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN', maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
}); });
} }
@ -106,7 +107,7 @@ export function SystemConfig() {
setConfig({ setConfig({
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '', llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096, llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
githubToken: '', gitlabToken: '', githubToken: '', gitlabToken: '', giteaToken: '',
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN', maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
}); });
} finally { } finally {
@ -125,7 +126,7 @@ export function SystemConfig() {
llmMaxTokens: config.llmMaxTokens, llmMaxTokens: config.llmMaxTokens,
}, },
otherConfig: { otherConfig: {
githubToken: config.githubToken, gitlabToken: config.gitlabToken, githubToken: config.githubToken, gitlabToken: config.gitlabToken, giteaToken: config.giteaToken,
maxAnalyzeFiles: config.maxAnalyzeFiles, llmConcurrency: config.llmConcurrency, maxAnalyzeFiles: config.maxAnalyzeFiles, llmConcurrency: config.llmConcurrency,
llmGapMs: config.llmGapMs, outputLanguage: config.outputLanguage, llmGapMs: config.llmGapMs, outputLanguage: config.outputLanguage,
}, },
@ -144,6 +145,7 @@ export function SystemConfig() {
llmMaxTokens: llmConfig.llmMaxTokens || 4096, llmMaxTokens: llmConfig.llmMaxTokens || 4096,
githubToken: otherConfig.githubToken || '', githubToken: otherConfig.githubToken || '',
gitlabToken: otherConfig.gitlabToken || '', gitlabToken: otherConfig.gitlabToken || '',
giteaToken: otherConfig.giteaToken || '',
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0, maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
llmConcurrency: otherConfig.llmConcurrency || 3, llmConcurrency: otherConfig.llmConcurrency || 3,
llmGapMs: otherConfig.llmGapMs || 2000, llmGapMs: otherConfig.llmGapMs || 2000,
@ -526,6 +528,22 @@ export function SystemConfig() {
</a> </a>
</p> </p>
</div> </div>
<div className="space-y-2">
<Label className="text-xs font-bold text-gray-500 uppercase">Gitea Token ()</Label>
<Input
type="password"
value={config.giteaToken}
onChange={(e) => updateConfig('giteaToken', e.target.value)}
placeholder="sha1_xxxxxxxxxxxx"
className="h-10 cyber-input"
/>
<p className="text-xs text-gray-600">
访 Gitea :{' '}
<span className="text-primary">
[your-gitea-instance]/user/settings/applications
</span>
</p>
</div>
<div className="bg-gray-900/50 border border-gray-800 p-4 rounded-lg text-xs"> <div className="bg-gray-900/50 border border-gray-800 p-4 rounded-lg text-xs">
<p className="font-bold text-gray-400 flex items-center gap-2 mb-2"> <p className="font-bold text-gray-400 flex items-center gap-2 mb-2">
<Info className="w-4 h-4 text-sky-400" /> <Info className="w-4 h-4 text-sky-400" />

View File

@ -275,6 +275,7 @@ export default function Projects() {
switch (type) { switch (type) {
case 'github': return <Github className="w-5 h-5" />; case 'github': return <Github className="w-5 h-5" />;
case 'gitlab': return <GitBranch className="w-5 h-5 text-orange-500" />; case 'gitlab': return <GitBranch className="w-5 h-5 text-orange-500" />;
case 'gitea': return <GitBranch className="w-5 h-5 text-green-600" />;
default: return <Folder className="w-5 h-5 text-gray-600" />; default: return <Folder className="w-5 h-5 text-gray-600" />;
} }
}; };
@ -486,6 +487,7 @@ export default function Projects() {
<SelectContent className="bg-[#0c0c12] border-gray-700"> <SelectContent className="bg-[#0c0c12] border-gray-700">
<SelectItem value="github">GITHUB</SelectItem> <SelectItem value="github">GITHUB</SelectItem>
<SelectItem value="gitlab">GITLAB</SelectItem> <SelectItem value="gitlab">GITLAB</SelectItem>
<SelectItem value="gitea">GITEA</SelectItem>
<SelectItem value="other">OTHER</SelectItem> <SelectItem value="other">OTHER</SelectItem>
</SelectContent> </SelectContent>
</Select> </Select>

View File

@ -10,17 +10,17 @@ export const PROJECT_SOURCE_TYPES: Array<{
label: string; label: string;
description: string; description: string;
}> = [ }> = [
{ {
value: 'repository', value: 'repository',
label: '远程仓库', label: '远程仓库',
description: '从 GitHub/GitLab 等远程仓库拉取代码' description: '从 GitHub/GitLab 等远程仓库拉取代码'
}, },
{ {
value: 'zip', value: 'zip',
label: 'ZIP上传', label: 'ZIP上传',
description: '上传本地ZIP压缩包进行扫描' description: '上传本地ZIP压缩包进行扫描'
} }
]; ];
// 仓库平台选项 // 仓库平台选项
export const REPOSITORY_PLATFORMS: Array<{ export const REPOSITORY_PLATFORMS: Array<{
@ -28,10 +28,11 @@ export const REPOSITORY_PLATFORMS: Array<{
label: string; label: string;
icon?: string; icon?: string;
}> = [ }> = [
{ value: 'github', label: 'GitHub' }, { value: 'github', label: 'GitHub' },
{ value: 'gitlab', label: 'GitLab' }, { value: 'gitlab', label: 'GitLab' },
{ value: 'other', label: '其他' } { value: 'gitea', label: 'Gitea' },
]; { value: 'other', label: '其他' }
];
// 项目来源类型的颜色配置 // 项目来源类型的颜色配置
export const SOURCE_TYPE_COLORS: Record<ProjectSourceType, { export const SOURCE_TYPE_COLORS: Record<ProjectSourceType, {
@ -58,5 +59,6 @@ export const PLATFORM_COLORS: Record<RepositoryPlatform, {
}> = { }> = {
github: { bg: 'bg-gray-800', text: 'text-white' }, github: { bg: 'bg-gray-800', text: 'text-white' },
gitlab: { bg: 'bg-orange-500', text: 'text-white' }, gitlab: { bg: 'bg-orange-500', text: 'text-white' },
gitea: { bg: 'bg-green-600', text: 'text-white' },
other: { bg: 'bg-gray-500', text: 'text-white' } other: { bg: 'bg-gray-500', text: 'text-white' }
}; };

View File

@ -24,7 +24,7 @@ export interface Profile {
export type ProjectSourceType = 'repository' | 'zip'; export type ProjectSourceType = 'repository' | 'zip';
// 仓库平台类型 // 仓库平台类型
export type RepositoryPlatform = 'github' | 'gitlab' | 'other'; export type RepositoryPlatform = 'github' | 'gitlab' | 'gitea' | 'other';
// 项目相关类型 // 项目相关类型
export interface Project { export interface Project {

View File

@ -48,6 +48,7 @@ export function getRepositoryPlatformLabel(platform?: string): string {
const labels: Record<string, string> = { const labels: Record<string, string> = {
github: 'GitHub', github: 'GitHub',
gitlab: 'GitLab', gitlab: 'GitLab',
gitea: 'Gitea',
other: '其他' other: '其他'
}; };
return labels[platform || 'other'] || '其他'; return labels[platform || 'other'] || '其他';