diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 43d6385..536efda 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -65,6 +65,9 @@ class Settings(BaseSettings):
# GitLab配置
GITLAB_TOKEN: Optional[str] = None
+ # Gitea配置
+ GITEA_TOKEN: Optional[str] = None
+
# 扫描配置
MAX_ANALYZE_FILES: int = 0 # 最大分析文件数,0表示无限制
MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB
diff --git a/backend/app/models/project.py b/backend/app/models/project.py
index ac89beb..5debcbf 100644
--- a/backend/app/models/project.py
+++ b/backend/app/models/project.py
@@ -16,7 +16,7 @@ class Project(Base):
# 仓库相关字段 (仅 source_type='repository' 时使用)
repository_url = Column(String, nullable=True)
- repository_type = Column(String, default="other") # github, gitlab, other
+ repository_type = Column(String, default="other") # github, gitlab, gitea, other
default_branch = Column(String, default="main")
programming_languages = Column(Text, default="[]") # Stored as JSON string
diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py
index 9937e28..fea8968 100644
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@@ -1,5 +1,5 @@
"""
-仓库扫描服务 - 支持GitHub和GitLab仓库扫描
+仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描
"""
import asyncio
@@ -98,6 +98,25 @@ async def github_api(url: str, token: str = None) -> Any:
return response.json()
+
+async def gitea_api(url: str, token: str = None) -> Any:
+ """调用Gitea API"""
+ headers = {"Content-Type": "application/json"}
+ t = token or settings.GITEA_TOKEN
+ if t:
+ headers["Authorization"] = f"token {t}"
+
+ async with httpx.AsyncClient(timeout=30) as client:
+ response = await client.get(url, headers=headers)
+ if response.status_code == 401:
+ raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限")
+ if response.status_code == 403:
+ raise Exception("Gitea API 403:请确认仓库权限/频率限制")
+ if response.status_code != 200:
+ raise Exception(f"Gitea API {response.status_code}: {url}")
+ return response.json()
+
+
async def gitlab_api(url: str, token: str = None) -> Any:
"""调用GitLab API"""
headers = {"Content-Type": "application/json"}
@@ -130,7 +149,9 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitHub仓库分支列表"""
- match = repo_url.rstrip('/').rstrip('.git')
+ match = repo_url.rstrip('/')
+ if match.endswith('.git'):
+ match = match[:-4]
if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2:
@@ -146,6 +167,30 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
return [b["name"] for b in branches_data]
+
+
+
+async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
+ """获取Gitea仓库分支列表"""
+ parsed = urlparse(repo_url)
+ base = f"{parsed.scheme}://{parsed.netloc}"
+
+ # 提取Owner和Repo: path通常是 /owner/repo.git 或 /owner/repo
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
+ parts = path.split('/')
+ if len(parts) < 2:
+ raise Exception("Gitea 仓库 URL 格式错误")
+
+ owner, repo = parts[0], parts[1]
+
+ branches_url = f"{base}/api/v1/repos/{owner}/{repo}/branches"
+ branches_data = await gitea_api(branches_url, token)
+
+ return [b["name"] for b in branches_data]
+
+
async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
"""获取GitLab仓库分支列表"""
parsed = urlparse(repo_url)
@@ -158,7 +203,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
elif parsed.username and not parsed.password:
extracted_token = parsed.username
- path = parsed.path.strip('/').rstrip('.git')
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
if not path:
raise Exception("GitLab 仓库 URL 格式错误")
@@ -172,7 +219,9 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
"""获取GitHub仓库文件列表"""
# 解析仓库URL
- match = repo_url.rstrip('/').rstrip('.git')
+ match = repo_url.rstrip('/')
+ if match.endswith('.git'):
+ match = match[:-4]
if 'github.com/' in match:
parts = match.split('github.com/')[-1].split('/')
if len(parts) >= 2:
@@ -213,7 +262,9 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
extracted_token = parsed.username
# 解析项目路径
- path = parsed.path.strip('/').rstrip('.git')
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
if not path:
raise Exception("GitLab 仓库 URL 格式错误")
@@ -235,6 +286,47 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
return files
+
+async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
+ """获取Gitea仓库文件列表"""
+ parsed = urlparse(repo_url)
+ base = f"{parsed.scheme}://{parsed.netloc}"
+
+ path = parsed.path.strip('/')
+ if path.endswith('.git'):
+ path = path[:-4]
+ parts = path.split('/')
+ if len(parts) < 2:
+ raise Exception("Gitea 仓库 URL 格式错误")
+
+ owner, repo = parts[0], parts[1]
+
+ # Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
+ # 可以直接使用分支名作为sha
+ tree_url = f"{base}/api/v1/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
+ tree_data = await gitea_api(tree_url, token)
+
+ files = []
+ for item in tree_data.get("tree", []):
+ # Gitea API returns 'type': 'blob' for files
+ if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
+ # Gitea raw file URL: {base}/{owner}/{repo}/raw/branch/{branch}/{path}
+ # 或者 API: /repos/{owner}/{repo}/contents/{filepath}?ref={branch} (get content, base64)
+ # 这里使用 raw URL 可能会更方便,但要注意私有仓库可能需要token访问raw
+ # Gitea raw URL usually works with token in header or query param.
+ # Standard Gitea: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} (API) returns raw content?
+ # Actually Gitea raw url: {base}/{owner}/{repo}/raw/branch/{branch}/{path} or /raw/tag or /raw/commit
+
+ # 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch} ==> 实际是 /repos/{owner}/{repo}/raw/{path} (ref通过query param?)
+ # 查阅文档,Gitea API v1 /repos/{owner}/{repo}/raw/{filepath} 接受 ref query param
+ # URL: {base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={branch}
+ files.append({
+ "path": item["path"],
+ "url": f"{base}/api/v1/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
+ "token": token # 传递token以便fetch_file_content使用
+ })
+
+ return files
async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
"""
后台仓库扫描任务
@@ -293,24 +385,23 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
user_other_config = (user_config or {}).get('otherConfig', {})
github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN
gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN
+ gitea_token = user_other_config.get('giteaToken') or settings.GITEA_TOKEN
+
+
files: List[Dict[str, str]] = []
extracted_gitlab_token = None
-
- # 构建分支尝试顺序(分支降级机制)
- branches_to_try = [branch]
- if project.default_branch and project.default_branch != branch:
- branches_to_try.append(project.default_branch)
- for common_branch in ["main", "master"]:
- if common_branch not in branches_to_try:
- branches_to_try.append(common_branch)
-
- actual_branch = branch # 实际使用的分支
last_error = None
+ actual_branch = branch
+
+ # 构造尝试的分支列表
+ branches_to_try = [branch]
+ if branch not in ["main", "master"]:
+ branches_to_try.extend(["main", "master"])
+ branches_to_try = list(dict.fromkeys(branches_to_try))
for try_branch in branches_to_try:
try:
- print(f"🔄 尝试获取分支 {try_branch} 的文件列表...")
if repo_type == "github":
files = await get_github_files(repo_url, try_branch, github_token, task_exclude_patterns)
elif repo_type == "gitlab":
@@ -318,8 +409,10 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
# GitLab文件可能带有token
if files and 'token' in files[0]:
extracted_gitlab_token = files[0].get('token')
+ elif repo_type == "gitea":
+ files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
else:
- raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库")
+ raise Exception("不支持的仓库类型,仅支持 GitHub, GitLab 和 Gitea 仓库")
if files:
actual_branch = try_branch
@@ -386,10 +479,21 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
try:
# 获取文件内容
headers = {}
- # 使用提取的 GitLab token 或用户配置的 token
- token_to_use = extracted_gitlab_token or gitlab_token
- if token_to_use:
- headers["PRIVATE-TOKEN"] = token_to_use
+ # 使用提取的 token 或用户配置的 token
+
+ if repo_type == "gitlab":
+ token_to_use = extracted_token or gitlab_token
+ if token_to_use:
+ headers["PRIVATE-TOKEN"] = token_to_use
+ elif repo_type == "gitea":
+ token_to_use = extracted_token or gitea_token
+ if token_to_use:
+ headers["Authorization"] = f"token {token_to_use}"
+ elif repo_type == "github":
+ # GitHub raw URL 也是直接下载,通常public不需要token,private需要
+ # GitHub raw user content url: raw.githubusercontent.com
+ if github_token:
+ headers["Authorization"] = f"Bearer {github_token}"
print(f"📥 正在获取文件: {file_info['path']}")
content = await fetch_file_content(file_info["url"], headers)
diff --git a/backend/env.example b/backend/env.example
index 2fd0d25..37eeb24 100644
--- a/backend/env.example
+++ b/backend/env.example
@@ -183,6 +183,11 @@ GITHUB_TOKEN=
# 权限要求: read_repository
GITLAB_TOKEN=
+# Gitea Access Token
+# 获取地址: https://[your-gitea-instance]/user/settings/applications
+# 权限要求: read_repository
+GITEA_TOKEN=
+
# =============================================
# 扫描配置
# =============================================
diff --git a/frontend/src/components/system/SystemConfig.tsx b/frontend/src/components/system/SystemConfig.tsx
index 81c1345..3411f85 100644
--- a/frontend/src/components/system/SystemConfig.tsx
+++ b/frontend/src/components/system/SystemConfig.tsx
@@ -41,7 +41,7 @@ const DEFAULT_MODELS: Record
+ 用于访问 Gitea 私有仓库。获取:{' '} + + [your-gitea-instance]/user/settings/applications + +
+