78 lines
2.6 KiB
Python
78 lines
2.6 KiB
Python
from urllib.parse import urlparse, urlunparse
|
|
from typing import Dict, Optional
|
|
|
|
def parse_repository_url(repo_url: str, repo_type: str) -> Dict[str, str]:
|
|
"""
|
|
Parses a repository URL and returns its components.
|
|
|
|
Args:
|
|
repo_url: The repository URL.
|
|
repo_type: The type of repository ('github', 'gitlab', 'gitea').
|
|
|
|
Returns:
|
|
A dictionary containing parsed components:
|
|
- base_url: The API base URL (for self-hosted instances) or default API URL.
|
|
- owner: The owner/namespace of the repository.
|
|
- repo: The repository name.
|
|
- server_url: The base URL of the server (scheme + netloc).
|
|
|
|
Raises:
|
|
ValueError: If the URL is invalid or schema/domain check fails.
|
|
"""
|
|
if not repo_url:
|
|
raise ValueError(f"{repo_type} 仓库 URL 不能为空")
|
|
|
|
# Basic sanitization
|
|
repo_url = repo_url.strip()
|
|
|
|
# Check scheme to prevent SSRF (only allow http and https)
|
|
parsed = urlparse(repo_url)
|
|
if parsed.scheme not in ('http', 'https'):
|
|
raise ValueError(f"{repo_type} 仓库 URL 必须使用 http 或 https 协议")
|
|
|
|
# Remove .git suffix if present
|
|
path = parsed.path.strip('/')
|
|
if path.endswith('.git'):
|
|
path = path[:-4]
|
|
|
|
path_parts = path.split('/')
|
|
if len(path_parts) < 2:
|
|
raise ValueError(f"{repo_type} 仓库 URL 格式错误")
|
|
|
|
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
|
|
if repo_type == "github":
|
|
# Handle github.com specifically if needed, or assume path_parts are owner/repo
|
|
# Case: https://github.com/owner/repo
|
|
if 'github.com' in parsed.netloc:
|
|
owner, repo = path_parts[-2], path_parts[-1]
|
|
api_base = "https://api.github.com"
|
|
else:
|
|
# Enterprise GitHub or similar?
|
|
owner, repo = path_parts[-2], path_parts[-1]
|
|
api_base = f"{base}/api/v3" # Assumption for GHE
|
|
|
|
elif repo_type == "gitlab":
|
|
# GitLab supports subgroups, so path could be group/subgroup/repo
|
|
# But commonly we just need project path (URL encoded)
|
|
# We'll treat the full path as the project path identifier
|
|
repo = path_parts[-1]
|
|
owner = "/".join(path_parts[:-1])
|
|
api_base = f"{base}/api/v4"
|
|
|
|
elif repo_type == "gitea":
|
|
# Gitea: /owner/repo
|
|
owner, repo = path_parts[0], path_parts[1]
|
|
api_base = f"{base}/api/v1"
|
|
|
|
else:
|
|
raise ValueError(f"不支持的仓库类型: {repo_type}")
|
|
|
|
return {
|
|
"base_url": api_base,
|
|
"owner": owner,
|
|
"repo": repo,
|
|
"project_path": path, # Useful for GitLab
|
|
"server_url": base
|
|
}
|