Merge branch 'v3.0.0' of github.com:lintsinghua/DeepAudit into feature/git_ssh
# Conflicts: # backend/app/services/scanner.py # backend/uv.lock # frontend/src/pages/Projects.tsx
This commit is contained in:
commit
9399c01d8c
54
README.md
54
README.md
|
|
@ -67,7 +67,7 @@
|
|||
<td width="50%" align="center">
|
||||
<strong>🗂️ 项目管理</strong><br/><br/>
|
||||
<img src="frontend/public/images/README-show/项目管理.png" alt="项目管理" width="95%"><br/>
|
||||
<em>GitHub/GitLab 导入,多项目协同管理</em>
|
||||
<em>GitHub/GitLab/Gitea 导入,多项目协同管理</em>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
|
@ -197,6 +197,37 @@ docker pull ghcr.nju.edu.cn/lintsinghua/deepaudit-sandbox:latest
|
|||
|
||||
> 💡 镜像源由 [南京大学开源镜像站](https://mirrors.nju.edu.cn/) 提供支持
|
||||
|
||||
<details>
|
||||
<summary>💡 配置 Docker 镜像加速(可选,进一步提升拉取速度)(点击展开)</summary>
|
||||
|
||||
如果拉取镜像仍然较慢,可以配置 Docker 镜像加速器。编辑 Docker 配置文件并添加以下镜像源:
|
||||
|
||||
**Linux / macOS**:编辑 `/etc/docker/daemon.json`
|
||||
|
||||
**Windows**:右键 Docker Desktop 图标 → Settings → Docker Engine
|
||||
|
||||
```json
|
||||
{
|
||||
"registry-mirrors": [
|
||||
"https://docker.1ms.run",
|
||||
"https://dockerproxy.com",
|
||||
"https://hub.rat.dev"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
保存后重启 Docker 服务:
|
||||
|
||||
```bash
|
||||
# Linux
|
||||
sudo systemctl restart docker
|
||||
|
||||
# macOS / Windows
|
||||
# 重启 Docker Desktop 应用
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
> 🎉 **启动成功!** 访问 http://localhost:3000 开始体验。
|
||||
|
||||
---
|
||||
|
|
@ -231,10 +262,22 @@ docker compose up -d
|
|||
- PostgreSQL 15+
|
||||
- Docker (用于沙箱)
|
||||
|
||||
### 1. 后端启动
|
||||
|
||||
### 1. 手动启动数据库
|
||||
|
||||
```bash
|
||||
docker compose up -d redis db
|
||||
```
|
||||
|
||||
### 2. 后端启动
|
||||
|
||||
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
# 配置环境
|
||||
cp env.example .env
|
||||
|
||||
# 使用 uv 管理环境(推荐)
|
||||
uv sync
|
||||
source .venv/bin/activate
|
||||
|
|
@ -243,10 +286,13 @@ source .venv/bin/activate
|
|||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
### 2. 前端启动
|
||||
### 3. 前端启动
|
||||
|
||||
```bash
|
||||
cd frontend
|
||||
# 配置环境
|
||||
cp .env.example .env
|
||||
|
||||
pnpm install
|
||||
pnpm dev
|
||||
```
|
||||
|
|
@ -347,7 +393,7 @@ DeepSeek-Coder · Codestral<br/>
|
|||
| 🤖 **Agent 深度审计** | Multi-Agent 协作,自主编排审计策略 | Agent |
|
||||
| 🧠 **RAG 知识增强** | 代码语义理解,CWE/CVE 知识库检索 | Agent |
|
||||
| 🔒 **沙箱 PoC 验证** | Docker 隔离执行,验证漏洞有效性 | Agent |
|
||||
| 🗂️ **项目管理** | GitHub/GitLab 导入,ZIP 上传,10+ 语言支持 | 通用 |
|
||||
| 🗂️ **项目管理** | GitHub/GitLab/Gitea 导入,ZIP 上传,10+ 语言支持 | 通用 |
|
||||
| ⚡ **即时分析** | 代码片段秒级分析,粘贴即用 | 通用 |
|
||||
| 🔍 **五维检测** | Bug · 安全 · 性能 · 风格 · 可维护性 | 通用 |
|
||||
| 💡 **What-Why-How** | 精准定位 + 原因解释 + 修复建议 | 通用 |
|
||||
|
|
|
|||
31
README_EN.md
31
README_EN.md
|
|
@ -173,6 +173,37 @@ Using pre-built Docker images, no need to clone code, start with one command:
|
|||
curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/v3.0.0/docker-compose.prod.yml | docker compose -f - up -d
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>💡 Configure Docker Registry Mirrors (Optional, for faster image pulling) (Click to expand)</summary>
|
||||
|
||||
If pulling images is still slow, you can configure Docker registry mirrors. Edit the Docker configuration file and add the following mirror sources:
|
||||
|
||||
**Linux / macOS**: Edit `/etc/docker/daemon.json`
|
||||
|
||||
**Windows**: Right-click Docker Desktop icon → Settings → Docker Engine
|
||||
|
||||
```json
|
||||
{
|
||||
"registry-mirrors": [
|
||||
"https://docker.1ms.run",
|
||||
"https://dockerproxy.com",
|
||||
"https://hub.rat.dev"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Restart Docker service after saving:
|
||||
|
||||
```bash
|
||||
# Linux
|
||||
sudo systemctl restart docker
|
||||
|
||||
# macOS / Windows
|
||||
# Restart Docker Desktop application
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
> **Success!** Visit http://localhost:3000 to start exploring.
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
3.13
|
||||
3.12
|
||||
|
|
|
|||
|
|
@ -447,7 +447,7 @@ async def _execute_agent_task(task_id: str):
|
|||
# 这确保即使 runner.cancel() 失败,Agent 也能通过 checking 全局标志感知取消
|
||||
def check_global_cancel():
|
||||
return is_task_cancelled(task_id)
|
||||
|
||||
|
||||
orchestrator.set_cancel_callback(check_global_cancel)
|
||||
# 同时也为子 Agent 设置(虽然 Orchestrator 会传播)
|
||||
recon_agent.set_cancel_callback(check_global_cancel)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from app.models.user import User
|
|||
from app.models.audit import AuditTask, AuditIssue
|
||||
from app.models.user_config import UserConfig
|
||||
import zipfile
|
||||
from app.services.scanner import scan_repo_task, get_github_files, get_gitlab_files, get_github_branches, get_gitlab_branches, should_exclude, is_text_file
|
||||
from app.services.scanner import scan_repo_task, get_github_files, get_gitlab_files, get_github_branches, get_gitlab_branches, get_gitea_branches, should_exclude, is_text_file
|
||||
from app.services.zip_storage import (
|
||||
save_project_zip, load_project_zip, get_project_zip_meta,
|
||||
delete_project_zip, has_project_zip
|
||||
|
|
@ -687,9 +687,10 @@ async def get_project_branches(
|
|||
config = config.scalar_one_or_none()
|
||||
|
||||
github_token = settings.GITHUB_TOKEN
|
||||
gitea_token = settings.GITEA_TOKEN
|
||||
gitlab_token = settings.GITLAB_TOKEN
|
||||
|
||||
SENSITIVE_OTHER_FIELDS = ['githubToken', 'gitlabToken']
|
||||
|
||||
SENSITIVE_OTHER_FIELDS = ['githubToken', 'gitlabToken', 'giteaToken']
|
||||
|
||||
if config and config.other_config:
|
||||
import json
|
||||
|
|
@ -701,12 +702,13 @@ async def get_project_branches(
|
|||
github_token = decrypted_val
|
||||
elif field == 'gitlabToken':
|
||||
gitlab_token = decrypted_val
|
||||
elif field == 'giteaToken':
|
||||
gitea_token = decrypted_val
|
||||
|
||||
repo_type = project.repository_type or "other"
|
||||
|
||||
# 详细日志
|
||||
print(f"[Branch] 项目: {project.name}, 类型: {repo_type}, URL: {project.repository_url}")
|
||||
print(f"[Branch] GitHub Token: {'已配置' if github_token else '未配置'}, GitLab Token: {'已配置' if gitlab_token else '未配置'}")
|
||||
|
||||
try:
|
||||
if repo_type == "github":
|
||||
|
|
@ -717,6 +719,10 @@ async def get_project_branches(
|
|||
if not gitlab_token:
|
||||
print("[Branch] 警告: GitLab Token 未配置,可能无法访问私有仓库")
|
||||
branches = await get_gitlab_branches(project.repository_url, gitlab_token)
|
||||
elif repo_type == "gitea":
|
||||
if not gitea_token:
|
||||
print("[Branch] 警告: Gitea Token 未配置,可能无法访问私有仓库")
|
||||
branches = await get_gitea_branches(project.repository_url, gitea_token)
|
||||
else:
|
||||
# 对于其他类型,返回默认分支
|
||||
print(f"[Branch] 仓库类型 '{repo_type}' 不支持获取分支,返回默认分支")
|
||||
|
|
|
|||
|
|
@ -65,6 +65,9 @@ class Settings(BaseSettings):
|
|||
# GitLab配置
|
||||
GITLAB_TOKEN: Optional[str] = None
|
||||
|
||||
# Gitea配置
|
||||
GITEA_TOKEN: Optional[str] = None
|
||||
|
||||
# 扫描配置
|
||||
MAX_ANALYZE_FILES: int = 0 # 最大分析文件数,0表示无限制
|
||||
MAX_FILE_SIZE_BYTES: int = 200 * 1024 # 最大文件大小 200KB
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class Project(Base):
|
|||
|
||||
# 仓库相关字段 (仅 source_type='repository' 时使用)
|
||||
repository_url = Column(String, nullable=True)
|
||||
repository_type = Column(String, default="other") # github, gitlab, other
|
||||
repository_type = Column(String, default="other") # github, gitlab, gitea, other
|
||||
default_branch = Column(String, default="main")
|
||||
|
||||
programming_languages = Column(Text, default="[]") # Stored as JSON string
|
||||
|
|
|
|||
|
|
@ -487,11 +487,11 @@ class BaseAgent(ABC):
|
|||
|
||||
# 🔥 外部取消检查回调
|
||||
self._cancel_callback = None
|
||||
|
||||
|
||||
def set_cancel_callback(self, callback) -> None:
|
||||
"""设置外部取消检查回调"""
|
||||
self._cancel_callback = callback
|
||||
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
"""检查是否已取消(包含内部标志和外部回调)"""
|
||||
|
|
@ -971,11 +971,11 @@ class BaseAgent(ABC):
|
|||
)
|
||||
# 兼容不同版本的 python async generator
|
||||
iterator = stream.__aiter__()
|
||||
|
||||
|
||||
import time
|
||||
first_token_received = False
|
||||
last_activity = time.time()
|
||||
|
||||
|
||||
while True:
|
||||
# 检查取消
|
||||
if self.is_cancelled:
|
||||
|
|
@ -988,7 +988,7 @@ class BaseAgent(ABC):
|
|||
timeout = 30.0 if not first_token_received else 60.0
|
||||
|
||||
chunk = await asyncio.wait_for(iterator.__anext__(), timeout=timeout)
|
||||
|
||||
|
||||
last_activity = time.time()
|
||||
|
||||
if chunk["type"] == "token":
|
||||
|
|
@ -1006,21 +1006,21 @@ class BaseAgent(ABC):
|
|||
# 实际上 service.py 中 chat_completion_stream 保证了 accumulated 存在
|
||||
# 这里我们信任 service 层的 accumulated
|
||||
pass
|
||||
|
||||
|
||||
# Double check if accumulated is empty but we have token
|
||||
if not accumulated and token:
|
||||
accumulated += token # Fallback
|
||||
|
||||
|
||||
await self.emit_thinking_token(token, accumulated)
|
||||
# 🔥 CRITICAL: 让出控制权给事件循环,让 SSE 有机会发送事件
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
elif chunk["type"] == "done":
|
||||
accumulated = chunk["content"]
|
||||
if chunk.get("usage"):
|
||||
total_tokens = chunk["usage"].get("total_tokens", 0)
|
||||
break
|
||||
|
||||
|
||||
elif chunk["type"] == "error":
|
||||
accumulated = chunk.get("accumulated", "")
|
||||
error_msg = chunk.get("error", "Unknown error")
|
||||
|
|
@ -1030,7 +1030,7 @@ class BaseAgent(ABC):
|
|||
else:
|
||||
accumulated = f"[系统错误: {error_msg}] 请重新思考并输出你的决策。"
|
||||
break
|
||||
|
||||
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
|
|
|
|||
|
|
@ -687,7 +687,7 @@ Action Input: {{"参数": "值"}}
|
|||
|
||||
# Use asyncio.wait to poll without cancelling the task
|
||||
done, pending = await asyncio.wait(
|
||||
[run_task],
|
||||
[run_task],
|
||||
timeout=0.5,
|
||||
return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
|
|
|
|||
|
|
@ -767,7 +767,9 @@ class GoTestTool(BaseLanguageTestTool):
|
|||
param_code = ""
|
||||
if params:
|
||||
args = ["program"] + list(params.values())
|
||||
param_code = f" os.Args = []string{{{', '.join([f'\"{a}\"' for a in args])}}}\n"
|
||||
args_str = ', '.join([f'"{a}"' for a in args])
|
||||
param_code = " os.Args = []string{{{}}}\n".format(args_str)
|
||||
# param_code = f" os.Args = []string{{{', '.join([f'\"{a}\"' for a in args])}}}\n"
|
||||
for key, value in params.items():
|
||||
param_code += f' os.Setenv("{key.upper()}", "{value}")\n'
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
仓库扫描服务 - 支持GitHub和GitLab仓库扫描
|
||||
仓库扫描服务 - 支持GitHub, GitLab 和 Gitea 仓库扫描
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
|||
from urllib.parse import urlparse, quote
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.utils.repo_utils import parse_repository_url
|
||||
from app.models.audit import AuditTask, AuditIssue
|
||||
from app.models.project import Project
|
||||
from app.services.llm.service import LLMService
|
||||
|
|
@ -117,6 +118,25 @@ async def github_api(url: str, token: str = None) -> Any:
|
|||
return response.json()
|
||||
|
||||
|
||||
|
||||
async def gitea_api(url: str, token: str = None) -> Any:
|
||||
"""调用Gitea API"""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
t = token or settings.GITEA_TOKEN
|
||||
if t:
|
||||
headers["Authorization"] = f"token {t}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
if response.status_code == 401:
|
||||
raise Exception("Gitea API 401:请配置 GITEA_TOKEN 或确认仓库权限")
|
||||
if response.status_code == 403:
|
||||
raise Exception("Gitea API 403:请确认仓库权限/频率限制")
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Gitea API {response.status_code}: {url}")
|
||||
return response.json()
|
||||
|
||||
|
||||
async def gitlab_api(url: str, token: str = None) -> Any:
|
||||
"""调用GitLab API"""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
|
@ -149,15 +169,8 @@ async def fetch_file_content(url: str, headers: Dict[str, str] = None) -> Option
|
|||
|
||||
async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取GitHub仓库分支列表"""
|
||||
match = repo_url.rstrip('/').rstrip('.git')
|
||||
if 'github.com/' in match:
|
||||
parts = match.split('github.com/')[-1].split('/')
|
||||
if len(parts) >= 2:
|
||||
owner, repo = parts[0], parts[1]
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "github")
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
branches_url = f"https://api.github.com/repos/{owner}/{repo}/branches?per_page=100"
|
||||
branches_data = await github_api(branches_url, token)
|
||||
|
|
@ -165,10 +178,24 @@ async def get_github_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
return [b["name"] for b in branches_data]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def get_gitea_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取Gitea仓库分支列表"""
|
||||
repo_info = parse_repository_url(repo_url, "gitea")
|
||||
base_url = repo_info['base_url'] # This is {base}/api/v1
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
branches_url = f"{base_url}/repos/{owner}/{repo}/branches"
|
||||
branches_data = await gitea_api(branches_url, token)
|
||||
|
||||
return [b["name"] for b in branches_data]
|
||||
|
||||
|
||||
async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
||||
"""获取GitLab仓库分支列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
extracted_token = token
|
||||
if parsed.username:
|
||||
|
|
@ -177,12 +204,11 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
elif parsed.username and not parsed.password:
|
||||
extracted_token = parsed.username
|
||||
|
||||
path = parsed.path.strip('/').rstrip('.git')
|
||||
if not path:
|
||||
raise Exception("GitLab 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "gitlab")
|
||||
base_url = repo_info['base_url']
|
||||
project_path = quote(repo_info['project_path'], safe='')
|
||||
|
||||
project_path = quote(path, safe='')
|
||||
branches_url = f"{base}/api/v4/projects/{project_path}/repository/branches?per_page=100"
|
||||
branches_url = f"{base_url}/projects/{project_path}/repository/branches?per_page=100"
|
||||
branches_data = await gitlab_api(branches_url, extracted_token)
|
||||
|
||||
return [b["name"] for b in branches_data]
|
||||
|
|
@ -191,15 +217,8 @@ async def get_gitlab_branches(repo_url: str, token: str = None) -> List[str]:
|
|||
async def get_github_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取GitHub仓库文件列表"""
|
||||
# 解析仓库URL
|
||||
match = repo_url.rstrip('/').rstrip('.git')
|
||||
if 'github.com/' in match:
|
||||
parts = match.split('github.com/')[-1].split('/')
|
||||
if len(parts) >= 2:
|
||||
owner, repo = parts[0], parts[1]
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
else:
|
||||
raise Exception("GitHub 仓库 URL 格式错误")
|
||||
repo_info = parse_repository_url(repo_url, "github")
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
# 获取仓库文件树
|
||||
tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
||||
|
|
@ -221,7 +240,6 @@ async def get_github_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取GitLab仓库文件列表"""
|
||||
parsed = urlparse(repo_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
# 从URL中提取token(如果存在)
|
||||
extracted_token = token
|
||||
|
|
@ -232,14 +250,12 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
extracted_token = parsed.username
|
||||
|
||||
# 解析项目路径
|
||||
path = parsed.path.strip('/').rstrip('.git')
|
||||
if not path:
|
||||
raise Exception("GitLab 仓库 URL 格式错误")
|
||||
|
||||
project_path = quote(path, safe='')
|
||||
repo_info = parse_repository_url(repo_url, "gitlab")
|
||||
base_url = repo_info['base_url'] # {base}/api/v4
|
||||
project_path = quote(repo_info['project_path'], safe='')
|
||||
|
||||
# 获取仓库文件树
|
||||
tree_url = f"{base}/api/v4/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
|
||||
tree_url = f"{base_url}/projects/{project_path}/repository/tree?ref={quote(branch)}&recursive=true&per_page=100"
|
||||
tree_data = await gitlab_api(tree_url, extracted_token)
|
||||
|
||||
files = []
|
||||
|
|
@ -247,13 +263,37 @@ async def get_gitlab_files(repo_url: str, branch: str, token: str = None, exclud
|
|||
if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
|
||||
files.append({
|
||||
"path": item["path"],
|
||||
"url": f"{base}/api/v4/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
|
||||
"url": f"{base_url}/projects/{project_path}/repository/files/{quote(item['path'], safe='')}/raw?ref={quote(branch)}",
|
||||
"token": extracted_token
|
||||
})
|
||||
|
||||
return files
|
||||
|
||||
|
||||
|
||||
async def get_gitea_files(repo_url: str, branch: str, token: str = None, exclude_patterns: List[str] = None) -> List[Dict[str, str]]:
|
||||
"""获取Gitea仓库文件列表"""
|
||||
repo_info = parse_repository_url(repo_url, "gitea")
|
||||
base_url = repo_info['base_url']
|
||||
owner, repo = repo_info['owner'], repo_info['repo']
|
||||
|
||||
# Gitea tree API: GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1
|
||||
# 可以直接使用分支名作为sha
|
||||
tree_url = f"{base_url}/repos/{owner}/{repo}/git/trees/{quote(branch)}?recursive=1"
|
||||
tree_data = await gitea_api(tree_url, token)
|
||||
|
||||
files = []
|
||||
for item in tree_data.get("tree", []):
|
||||
# Gitea API returns 'type': 'blob' for files
|
||||
if item.get("type") == "blob" and is_text_file(item["path"]) and not should_exclude(item["path"], exclude_patterns):
|
||||
# 使用API raw endpoint: GET /repos/{owner}/{repo}/raw/{filepath}?ref={branch}
|
||||
files.append({
|
||||
"path": item["path"],
|
||||
"url": f"{base_url}/repos/{owner}/{repo}/raw/{quote(item['path'])}?ref={quote(branch)}",
|
||||
"token": token # 传递token以便fetch_file_content使用
|
||||
})
|
||||
|
||||
return files
|
||||
async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = None):
|
||||
"""
|
||||
后台仓库扫描任务
|
||||
|
|
@ -312,6 +352,9 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
|||
user_other_config = (user_config or {}).get('otherConfig', {})
|
||||
github_token = user_other_config.get('githubToken') or settings.GITHUB_TOKEN
|
||||
gitlab_token = user_other_config.get('gitlabToken') or settings.GITLAB_TOKEN
|
||||
gitea_token = user_other_config.get('giteaToken') or settings.GITEA_TOKEN
|
||||
|
||||
|
||||
|
||||
# 获取SSH私钥(如果配置了)
|
||||
ssh_private_key = None
|
||||
|
|
@ -365,8 +408,10 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
|||
# GitLab文件可能带有token
|
||||
if files and 'token' in files[0]:
|
||||
extracted_gitlab_token = files[0].get('token')
|
||||
elif repo_type == "gitea":
|
||||
files = await get_gitea_files(repo_url, try_branch, gitea_token, task_exclude_patterns)
|
||||
else:
|
||||
raise Exception("不支持的仓库类型,仅支持 GitHub 和 GitLab 仓库")
|
||||
raise Exception("不支持的仓库类型,仅支持 GitHub, GitLab 和 Gitea 仓库")
|
||||
|
||||
if files:
|
||||
actual_branch = try_branch
|
||||
|
|
@ -437,18 +482,29 @@ async def scan_repo_task(task_id: str, db_session_factory, user_config: dict = N
|
|||
|
||||
try:
|
||||
# 获取文件内容
|
||||
|
||||
if is_ssh_url:
|
||||
# SSH方式已经包含了文件内容
|
||||
content = file_info.get('content', '')
|
||||
print(f"📥 正在处理SSH文件: {file_info['path']}")
|
||||
else:
|
||||
# API方式需要下载文件内容
|
||||
headers = {}
|
||||
# 使用提取的 GitLab token 或用户配置的 token
|
||||
token_to_use = extracted_gitlab_token or gitlab_token
|
||||
if token_to_use:
|
||||
headers["PRIVATE-TOKEN"] = token_to_use
|
||||
|
||||
# 使用提取的 token 或用户配置的 token
|
||||
|
||||
if repo_type == "gitlab":
|
||||
token_to_use = file_info.get('token') or gitlab_token
|
||||
if token_to_use:
|
||||
headers["PRIVATE-TOKEN"] = token_to_use
|
||||
elif repo_type == "gitea":
|
||||
token_to_use = file_info.get('token') or gitea_token
|
||||
if token_to_use:
|
||||
headers["Authorization"] = f"token {token_to_use}"
|
||||
elif repo_type == "github":
|
||||
# GitHub raw URL 也是直接下载,通常public不需要token,private需要
|
||||
# GitHub raw user content url: raw.githubusercontent.com
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
print(f"📥 正在获取文件: {file_info['path']}")
|
||||
content = await fetch_file_content(file_info["url"], headers)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
from urllib.parse import urlparse, urlunparse
|
||||
from typing import Dict, Optional
|
||||
|
||||
def parse_repository_url(repo_url: str, repo_type: str) -> Dict[str, str]:
|
||||
"""
|
||||
Parses a repository URL and returns its components.
|
||||
|
||||
Args:
|
||||
repo_url: The repository URL.
|
||||
repo_type: The type of repository ('github', 'gitlab', 'gitea').
|
||||
|
||||
Returns:
|
||||
A dictionary containing parsed components:
|
||||
- base_url: The API base URL (for self-hosted instances) or default API URL.
|
||||
- owner: The owner/namespace of the repository.
|
||||
- repo: The repository name.
|
||||
- server_url: The base URL of the server (scheme + netloc).
|
||||
|
||||
Raises:
|
||||
ValueError: If the URL is invalid or schema/domain check fails.
|
||||
"""
|
||||
if not repo_url:
|
||||
raise ValueError(f"{repo_type} 仓库 URL 不能为空")
|
||||
|
||||
# Basic sanitization
|
||||
repo_url = repo_url.strip()
|
||||
|
||||
# Check scheme to prevent SSRF (only allow http and https)
|
||||
parsed = urlparse(repo_url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
raise ValueError(f"{repo_type} 仓库 URL 必须使用 http 或 https 协议")
|
||||
|
||||
# Remove .git suffix if present
|
||||
path = parsed.path.strip('/')
|
||||
if path.endswith('.git'):
|
||||
path = path[:-4]
|
||||
|
||||
path_parts = path.split('/')
|
||||
if len(path_parts) < 2:
|
||||
raise ValueError(f"{repo_type} 仓库 URL 格式错误")
|
||||
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
if repo_type == "github":
|
||||
# Handle github.com specifically if needed, or assume path_parts are owner/repo
|
||||
# Case: https://github.com/owner/repo
|
||||
if 'github.com' in parsed.netloc:
|
||||
owner, repo = path_parts[-2], path_parts[-1]
|
||||
api_base = "https://api.github.com"
|
||||
else:
|
||||
# Enterprise GitHub or similar?
|
||||
owner, repo = path_parts[-2], path_parts[-1]
|
||||
api_base = f"{base}/api/v3" # Assumption for GHE
|
||||
|
||||
elif repo_type == "gitlab":
|
||||
# GitLab supports subgroups, so path could be group/subgroup/repo
|
||||
# But commonly we just need project path (URL encoded)
|
||||
# We'll treat the full path as the project path identifier
|
||||
repo = path_parts[-1]
|
||||
owner = "/".join(path_parts[:-1])
|
||||
api_base = f"{base}/api/v4"
|
||||
|
||||
elif repo_type == "gitea":
|
||||
# Gitea: /owner/repo
|
||||
owner, repo = path_parts[0], path_parts[1]
|
||||
api_base = f"{base}/api/v1"
|
||||
|
||||
else:
|
||||
raise ValueError(f"不支持的仓库类型: {repo_type}")
|
||||
|
||||
return {
|
||||
"base_url": api_base,
|
||||
"owner": owner,
|
||||
"repo": repo,
|
||||
"project_path": path, # Useful for GitLab
|
||||
"server_url": base
|
||||
}
|
||||
|
|
@ -31,7 +31,7 @@ exit(0 if asyncio.run(check_db()) else 1)
|
|||
echo "✅ 数据库连接成功"
|
||||
break
|
||||
fi
|
||||
|
||||
|
||||
retry_count=$((retry_count + 1))
|
||||
echo " 重试 $retry_count/$max_retries..."
|
||||
sleep 2
|
||||
|
|
|
|||
|
|
@ -183,6 +183,11 @@ GITHUB_TOKEN=
|
|||
# 权限要求: read_repository
|
||||
GITLAB_TOKEN=
|
||||
|
||||
# Gitea Access Token
|
||||
# 获取地址: https://[your-gitea-instance]/user/settings/applications
|
||||
# 权限要求: read_repository
|
||||
GITEA_TOKEN=
|
||||
|
||||
# =============================================
|
||||
# 扫描配置
|
||||
# =============================================
|
||||
|
|
|
|||
7044
backend/uv.lock
7044
backend/uv.lock
File diff suppressed because it is too large
Load Diff
|
|
@ -3,7 +3,6 @@
|
|||
# =============================================
|
||||
# 使用南京大学镜像站加速拉取 GHCR 镜像
|
||||
# 部署命令: curl -fsSL https://raw.githubusercontent.com/lintsinghua/DeepAudit/main/docker-compose.prod.cn.yml | docker compose -f - up -d
|
||||
#
|
||||
# 镜像加速说明:
|
||||
# - 原始地址:ghcr.io
|
||||
# - 加速地址:ghcr.nju.edu.cn(南京大学开源镜像站)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ services:
|
|||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
test: [ "CMD-SHELL", "pg_isready -U postgres" ]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
|
@ -43,7 +43,7 @@ services:
|
|||
volumes:
|
||||
# - ./backend/app:/app/app:ro # 挂载代码目录,修改后自动生效
|
||||
- backend_uploads:/app/uploads
|
||||
- /var/run/docker.sock:/var/run/docker.sock # 沙箱执行必须
|
||||
- /var/run/docker.sock:/var/run/docker.sock # 沙箱执行必须
|
||||
ports:
|
||||
- "8000:8000"
|
||||
env_file:
|
||||
|
|
@ -85,7 +85,7 @@ services:
|
|||
# - ./frontend/dist:/usr/share/nginx/html:ro # 挂载构建产物,本地 pnpm build 后自动生效
|
||||
- ./frontend/nginx.conf:/etc/nginx/conf.d/default.conf:ro # 挂载 nginx 配置
|
||||
ports:
|
||||
- "3000:80" # Nginx 监听 80 端口
|
||||
- "3000:80" # Nginx 监听 80 端口
|
||||
environment:
|
||||
# 禁用代理 - nginx 需要直连后端
|
||||
- HTTP_PROXY=
|
||||
|
|
@ -93,6 +93,7 @@ services:
|
|||
- http_proxy=
|
||||
- https_proxy=
|
||||
- NO_PROXY=*
|
||||
- VITE_API_BASE_URL=/api/v1
|
||||
depends_on:
|
||||
- backend
|
||||
networks:
|
||||
|
|
@ -111,7 +112,7 @@ services:
|
|||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
test: [ "CMD", "redis-cli", "ping" ]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
# 后端 API 地址
|
||||
# - 本地开发: http://localhost:8000/api/v1
|
||||
# - Docker Compose 部署: /api
|
||||
VITE_API_BASE_URL=/api
|
||||
VITE_API_BASE_URL=/api/v1
|
||||
|
||||
# =============================================
|
||||
# Git 仓库集成配置(可选)
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ RUN pnpm config set network-timeout 300000 && \
|
|||
# 复制源代码
|
||||
COPY . .
|
||||
|
||||
# 🔥 构建时使用相对路径 /api - Nginx 会处理代理
|
||||
ENV VITE_API_BASE_URL=/api/v1
|
||||
# 🔥 构建时使用占位符 - 实现 Build Once Run Anywhere
|
||||
ENV VITE_API_BASE_URL=__API_BASE_URL__
|
||||
|
||||
# 构建生产版本
|
||||
RUN pnpm build
|
||||
|
|
@ -50,6 +50,11 @@ COPY --from=builder /app/dist /usr/share/nginx/html
|
|||
# 复制 Nginx 配置 (包含 SSE 反向代理配置)
|
||||
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
# 复制启动脚本
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
|
|
|
|||
|
|
@ -1,10 +1,16 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
# 替换 API 地址占位符
|
||||
API_URL="${VITE_API_BASE_URL:-http://localhost:8000/api/v1}"
|
||||
# 默认为 /api/v1,这样即使用户不传参,也能配合默认的 nginx 代理工作
|
||||
API_URL="${VITE_API_BASE_URL:-/api/v1}"
|
||||
|
||||
echo "Injecting API URL: $API_URL"
|
||||
|
||||
# 在所有 JS 文件中替换占位符
|
||||
find /app/dist -name '*.js' -exec sed -i "s|__API_BASE_URL__|${API_URL}|g" {} \;
|
||||
# 注意:这里路径必须是 nginx 实际存放文件的路径
|
||||
ESCAPED_API_URL=$(echo "${API_URL}" | sed 's/[&/|]/\\&/g')
|
||||
find /usr/share/nginx/html -name '*.js' -exec sed -i "s|__API_BASE_URL__|${ESCAPED_API_URL}|g" {} \;
|
||||
|
||||
# 执行原始命令
|
||||
exec "$@"
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ const DEFAULT_MODELS: Record<string, string> = {
|
|||
interface SystemConfigData {
|
||||
llmProvider: string; llmApiKey: string; llmModel: string; llmBaseUrl: string;
|
||||
llmTimeout: number; llmTemperature: number; llmMaxTokens: number;
|
||||
githubToken: string; gitlabToken: string;
|
||||
githubToken: string; gitlabToken: string; giteaToken: string;
|
||||
maxAnalyzeFiles: number; llmConcurrency: number; llmGapMs: number; outputLanguage: string;
|
||||
}
|
||||
|
||||
|
|
@ -79,6 +79,7 @@ export function SystemConfig() {
|
|||
llmMaxTokens: llmConfig.llmMaxTokens || 4096,
|
||||
githubToken: otherConfig.githubToken || '',
|
||||
gitlabToken: otherConfig.gitlabToken || '',
|
||||
giteaToken: otherConfig.giteaToken || '',
|
||||
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
|
||||
llmConcurrency: otherConfig.llmConcurrency || 3,
|
||||
llmGapMs: otherConfig.llmGapMs || 2000,
|
||||
|
|
@ -98,7 +99,7 @@ export function SystemConfig() {
|
|||
setConfig({
|
||||
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
|
||||
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
|
||||
githubToken: '', gitlabToken: '',
|
||||
githubToken: '', gitlabToken: '', giteaToken: '',
|
||||
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
|
||||
});
|
||||
}
|
||||
|
|
@ -107,7 +108,7 @@ export function SystemConfig() {
|
|||
setConfig({
|
||||
llmProvider: 'openai', llmApiKey: '', llmModel: '', llmBaseUrl: '',
|
||||
llmTimeout: 150000, llmTemperature: 0.1, llmMaxTokens: 4096,
|
||||
githubToken: '', gitlabToken: '',
|
||||
githubToken: '', gitlabToken: '', giteaToken: '',
|
||||
maxAnalyzeFiles: 0, llmConcurrency: 3, llmGapMs: 2000, outputLanguage: 'zh-CN',
|
||||
});
|
||||
} finally {
|
||||
|
|
@ -126,7 +127,7 @@ export function SystemConfig() {
|
|||
llmMaxTokens: config.llmMaxTokens,
|
||||
},
|
||||
otherConfig: {
|
||||
githubToken: config.githubToken, gitlabToken: config.gitlabToken,
|
||||
githubToken: config.githubToken, gitlabToken: config.gitlabToken, giteaToken: config.giteaToken,
|
||||
maxAnalyzeFiles: config.maxAnalyzeFiles, llmConcurrency: config.llmConcurrency,
|
||||
llmGapMs: config.llmGapMs, outputLanguage: config.outputLanguage,
|
||||
},
|
||||
|
|
@ -145,6 +146,7 @@ export function SystemConfig() {
|
|||
llmMaxTokens: llmConfig.llmMaxTokens || 4096,
|
||||
githubToken: otherConfig.githubToken || '',
|
||||
gitlabToken: otherConfig.gitlabToken || '',
|
||||
giteaToken: otherConfig.giteaToken || '',
|
||||
maxAnalyzeFiles: otherConfig.maxAnalyzeFiles ?? 0,
|
||||
llmConcurrency: otherConfig.llmConcurrency || 3,
|
||||
llmGapMs: otherConfig.llmGapMs || 2000,
|
||||
|
|
@ -612,6 +614,22 @@ export function SystemConfig() {
|
|||
</a>
|
||||
</p>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<Label className="text-xs font-bold text-muted-foreground uppercase">Gitea Token (可选)</Label>
|
||||
<Input
|
||||
type="password"
|
||||
value={config.giteaToken}
|
||||
onChange={(e) => updateConfig('giteaToken', e.target.value)}
|
||||
placeholder="sha1_xxxxxxxxxxxx"
|
||||
className="h-10 cyber-input"
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
用于访问 Gitea 私有仓库。获取:{' '}
|
||||
<span className="text-primary">
|
||||
[your-gitea-instance]/user/settings/applications
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div className="bg-muted border border-border p-4 rounded-lg text-xs">
|
||||
<p className="font-bold text-muted-foreground flex items-center gap-2 mb-2">
|
||||
<Info className="w-4 h-4 text-sky-400" />
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ export default function Projects() {
|
|||
switch (type) {
|
||||
case 'github': return <Github className="w-5 h-5" />;
|
||||
case 'gitlab': return <GitBranch className="w-5 h-5 text-orange-500" />;
|
||||
case 'gitea': return <GitBranch className="w-5 h-5 text-green-600" />;
|
||||
case 'other': return <Key className="w-5 h-5 text-cyan-500" />;
|
||||
default: return <Folder className="w-5 h-5 text-muted-foreground" />;
|
||||
}
|
||||
|
|
@ -486,9 +487,10 @@ export default function Projects() {
|
|||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent className="cyber-dialog border-border">
|
||||
<SelectItem value="github">GitHub Token</SelectItem>
|
||||
<SelectItem value="gitlab">GitLab Token</SelectItem>
|
||||
<SelectItem value="other">SSH Key</SelectItem>
|
||||
<SelectItem value="github">GITHUB</SelectItem>
|
||||
<SelectItem value="gitlab">GITLAB</SelectItem>
|
||||
<SelectItem value="gitea">GITEA</SelectItem>
|
||||
<SelectItem value="other">OTHER</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
|
@ -1044,9 +1046,10 @@ export default function Projects() {
|
|||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent className="cyber-dialog border-border">
|
||||
<SelectItem value="github">GitHub Token</SelectItem>
|
||||
<SelectItem value="gitlab">GitLab Token</SelectItem>
|
||||
<SelectItem value="other">SSH Key</SelectItem>
|
||||
<SelectItem value="github">GITHUB</SelectItem>
|
||||
<SelectItem value="gitlab">GITLAB</SelectItem>
|
||||
<SelectItem value="gitea">GITEA</SelectItem>
|
||||
<SelectItem value="other">OTHER</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -10,17 +10,17 @@ export const PROJECT_SOURCE_TYPES: Array<{
|
|||
label: string;
|
||||
description: string;
|
||||
}> = [
|
||||
{
|
||||
value: 'repository',
|
||||
label: '远程仓库',
|
||||
description: '从 GitHub/GitLab 等远程仓库拉取代码'
|
||||
},
|
||||
{
|
||||
value: 'zip',
|
||||
label: 'ZIP上传',
|
||||
description: '上传本地ZIP压缩包进行扫描'
|
||||
}
|
||||
];
|
||||
{
|
||||
value: 'repository',
|
||||
label: '远程仓库',
|
||||
description: '从 GitHub/GitLab 等远程仓库拉取代码'
|
||||
},
|
||||
{
|
||||
value: 'zip',
|
||||
label: 'ZIP上传',
|
||||
description: '上传本地ZIP压缩包进行扫描'
|
||||
}
|
||||
];
|
||||
|
||||
// 仓库平台选项
|
||||
export const REPOSITORY_PLATFORMS: Array<{
|
||||
|
|
@ -28,10 +28,11 @@ export const REPOSITORY_PLATFORMS: Array<{
|
|||
label: string;
|
||||
icon?: string;
|
||||
}> = [
|
||||
{ value: 'github', label: 'GitHub' },
|
||||
{ value: 'gitlab', label: 'GitLab' },
|
||||
{ value: 'other', label: '其他' }
|
||||
];
|
||||
{ value: 'github', label: 'GitHub' },
|
||||
{ value: 'gitlab', label: 'GitLab' },
|
||||
{ value: 'gitea', label: 'Gitea' },
|
||||
{ value: 'other', label: '其他' }
|
||||
];
|
||||
|
||||
// 项目来源类型的颜色配置
|
||||
export const SOURCE_TYPE_COLORS: Record<ProjectSourceType, {
|
||||
|
|
@ -58,5 +59,6 @@ export const PLATFORM_COLORS: Record<RepositoryPlatform, {
|
|||
}> = {
|
||||
github: { bg: 'bg-foreground', text: 'text-background' },
|
||||
gitlab: { bg: 'bg-orange-500', text: 'text-white' },
|
||||
gitea: { bg: 'bg-green-600', text: 'text-white' },
|
||||
other: { bg: 'bg-muted-foreground', text: 'text-background' }
|
||||
};
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export interface Profile {
|
|||
export type ProjectSourceType = 'repository' | 'zip';
|
||||
|
||||
// 仓库平台类型
|
||||
export type RepositoryPlatform = 'github' | 'gitlab' | 'other';
|
||||
export type RepositoryPlatform = 'github' | 'gitlab' | 'gitea' | 'other';
|
||||
|
||||
// 项目相关类型
|
||||
export interface Project {
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ export function getRepositoryPlatformLabel(platform?: string): string {
|
|||
const labels: Record<string, string> = {
|
||||
github: 'GitHub',
|
||||
gitlab: 'GitLab',
|
||||
gitea: 'Gitea',
|
||||
other: '其他'
|
||||
};
|
||||
return labels[platform || 'other'] || '其他';
|
||||
|
|
|
|||
Loading…
Reference in New Issue