CodeReview/backend/app/core/config.py

from typing import List, Union, Optional
from pydantic import AnyHttpUrl, validator
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    PROJECT_NAME: str = "DeepAudit"
    API_V1_STR: str = "/api/v1"
    
    # SECURITY
    SECRET_KEY: str = "changethis_in_production_to_a_long_random_string"
    ALGORITHM: str = "HS256"
    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8  # 8 days
    
    # CORS
    BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = []

    @validator("BACKEND_CORS_ORIGINS", pre=True)
    def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]:
        if isinstance(v, str) and not v.startswith("["):
            return [i.strip() for i in v.split(",")]
        elif isinstance(v, (list, str)):
            return v
        raise ValueError(v)

    # POSTGRES
    POSTGRES_SERVER: str = "db"
    POSTGRES_USER: str = "postgres"
    POSTGRES_PASSWORD: str = "postgres"
    POSTGRES_DB: str = "deepaudit"
    DATABASE_URL: str | None = None

    @validator("DATABASE_URL", pre=True)
    def assemble_db_connection(cls, v: str | None, values: dict[str, any]) -> str:
        if isinstance(v, str):
            return v
        return str(f"postgresql+asyncpg://{values.get('POSTGRES_USER')}:{values.get('POSTGRES_PASSWORD')}@{values.get('POSTGRES_SERVER')}/{values.get('POSTGRES_DB')}")

    # LLM配置
    LLM_PROVIDER: str = "openai"  # gemini, openai, claude, qwen, deepseek, zhipu, moonshot, baidu, minimax, doubao, ollama
    LLM_API_KEY: Optional[str] = None
    LLM_MODEL: Optional[str] = None  # 不指定时使用provider的默认模型
    LLM_BASE_URL: Optional[str] = None  # 自定义API端点（如中转站）
    LLM_TIMEOUT: int = 150  # 超时时间（秒）
    LLM_TEMPERATURE: float = 0.1
    LLM_MAX_TOKENS: int = 4096
    
    # 各LLM提供商的API Key配置（兼容单独配置）
    OPENAI_API_KEY: Optional[str] = None
    OPENAI_BASE_URL: Optional[str] = None
    GEMINI_API_KEY: Optional[str] = None
    CLAUDE_API_KEY: Optional[str] = None
    QWEN_API_KEY: Optional[str] = None
    DEEPSEEK_API_KEY: Optional[str] = None
    ZHIPU_API_KEY: Optional[str] = None
    MOONSHOT_API_KEY: Optional[str] = None
    BAIDU_API_KEY: Optional[str] = None  # 格式: api_key:secret_key
    MINIMAX_API_KEY: Optional[str] = None
    DOUBAO_API_KEY: Optional[str] = None
    OLLAMA_BASE_URL: Optional[str] = "http://localhost:11434/v1"
    
    # GitHub配置
    GITHUB_TOKEN: Optional[str] = None
    
    # GitLab配置
    GITLAB_TOKEN: Optional[str] = None
    
    # Gitea配置
    GITEA_TOKEN: Optional[str] = None
    
    # 扫描配置
    MAX_ANALYZE_FILES: int = 0  # 最大分析文件数，0表示无限制
    MAX_FILE_SIZE_BYTES: int = 200 * 1024  # 最大文件大小 200KB
    LLM_CONCURRENCY: int = 3  # LLM并发数
    LLM_GAP_MS: int = 2000  # LLM请求间隔（毫秒）
    
    # ZIP文件存储配置
    ZIP_STORAGE_PATH: str = "./uploads/zip_files"  # ZIP文件存储目录
    
    # 输出语言配置 - 支持 zh-CN（中文）和 en-US（英文）
    OUTPUT_LANGUAGE: str = "zh-CN"
    
    # ============ Agent 模块配置 ============

    # 嵌入模型配置（独立于 LLM 配置）
    EMBEDDING_PROVIDER: str = "openai"  # openai, azure, ollama, cohere, huggingface, jina, qwen
    EMBEDDING_MODEL: str = "text-embedding-3-small"
    EMBEDDING_API_KEY: Optional[str] = None  # 嵌入模型专用 API Key（留空则使用 LLM_API_KEY）
    EMBEDDING_BASE_URL: Optional[str] = None  # 嵌入模型专用 Base URL（留空使用提供商默认地址）
    
    # 向量数据库配置
    VECTOR_DB_PATH: str = "./data/vector_db"  # 向量数据库持久化目录

    # SSH配置
    SSH_CONFIG_PATH: str = "./data/ssh"  # SSH配置目录（存储known_hosts等）
    
    # Agent 配置
    AGENT_MAX_ITERATIONS: int = 50  # Agent 最大迭代次数
    AGENT_TOKEN_BUDGET: int = 100000  # Agent Token 预算
    AGENT_TIMEOUT_SECONDS: int = 1800  # Agent 超时时间（30分钟）
    
    # 沙箱配置（必须）
    SANDBOX_IMAGE: str = "deepaudit/sandbox:latest"  # 沙箱 Docker 镜像
    SANDBOX_MEMORY_LIMIT: str = "512m"  # 沙箱内存限制
    SANDBOX_CPU_LIMIT: float = 1.0  # 沙箱 CPU 限制
    SANDBOX_TIMEOUT: int = 60  # 沙箱命令超时（秒）
    SANDBOX_NETWORK_MODE: str = "none"  # 沙箱网络模式 (none, bridge)
    
    # RAG 配置
    RAG_CHUNK_SIZE: int = 1500  # 代码块大小（Token）
    RAG_CHUNK_OVERLAP: int = 50  # 代码块重叠（Token）
    RAG_TOP_K: int = 10  # 检索返回数量

    class Config:
        case_sensitive = True
        env_file = ".env"
        extra = "ignore"  # 忽略额外的环境变量（如 VITE_* 前端变量）


settings = Settings()
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								from typing import List, Union, Optional
 								from pydantic import AnyHttpUrl, validator
 								from pydantic_settings import BaseSettings
 								class Settings(BaseSettings):
-												chore: rebrand XCodeReviewer to DeepAudit across all files

- Update project name from XCodeReviewer to DeepAudit in CI/CD workflows
- Replace Docker image names and release artifact naming conventions
- Update GitHub repository references in documentation files
- Modify database names and container names in setup instructions
- Update contributor badge and issue tracker links
- Rename frontend logo file from logo_deepaudit.png
- Update environment configuration examples and documentation
- Rebrand all references in CONTRIBUTING.md, DISCLAIMER.md, and README.md
- Update backend configuration and deployment documentation
- Ensure consistent naming across frontend and backend configurations

											
										
										
											2025-12-08 21:35:09 +08:00
+								    PROJECT_NAME: str = "DeepAudit"
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    API_V1_STR: str = "/api/v1"
 								    # SECURITY
 								    SECRET_KEY: str = "changethis_in_production_to_a_long_random_string"
 								    ALGORITHM: str = "HS256"
 								    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8  # 8 days
 								    # CORS
 								    BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = []
 								    @validator("BACKEND_CORS_ORIGINS", pre=True)
 								    def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]:
 								        if isinstance(v, str) and not v.startswith("["):
 								            return [i.strip() for i in v.split(",")]
 								        elif isinstance(v, (list, str)):
 								            return v
 								        raise ValueError(v)
 								    # POSTGRES
 								    POSTGRES_SERVER: str = "db"
 								    POSTGRES_USER: str = "postgres"
 								    POSTGRES_PASSWORD: str = "postgres"
-												chore: rebrand XCodeReviewer to DeepAudit across all files

- Update project name from XCodeReviewer to DeepAudit in CI/CD workflows
- Replace Docker image names and release artifact naming conventions
- Update GitHub repository references in documentation files
- Modify database names and container names in setup instructions
- Update contributor badge and issue tracker links
- Rename frontend logo file from logo_deepaudit.png
- Update environment configuration examples and documentation
- Rebrand all references in CONTRIBUTING.md, DISCLAIMER.md, and README.md
- Update backend configuration and deployment documentation
- Ensure consistent naming across frontend and backend configurations

											
										
										
											2025-12-08 21:35:09 +08:00
+								    POSTGRES_DB: str = "deepaudit"
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    DATABASE_URL: str | None = None
 								    @validator("DATABASE_URL", pre=True)
 								    def assemble_db_connection(cls, v: str | None, values: dict[str, any]) -> str:
 								        if isinstance(v, str):
 								            return v
 								        return str(f"postgresql+asyncpg://{values.get('POSTGRES_USER')}:{values.get('POSTGRES_PASSWORD')}@{values.get('POSTGRES_SERVER')}/{values.get('POSTGRES_DB')}")
 								    # LLM配置
 								    LLM_PROVIDER: str = "openai"  # gemini, openai, claude, qwen, deepseek, zhipu, moonshot, baidu, minimax, doubao, ollama
 								    LLM_API_KEY: Optional[str] = None
 								    LLM_MODEL: Optional[str] = None  # 不指定时使用provider的默认模型
 								    LLM_BASE_URL: Optional[str] = None  # 自定义API端点（如中转站）
 								    LLM_TIMEOUT: int = 150  # 超时时间（秒）
 								    LLM_TEMPERATURE: float = 0.1
 								    LLM_MAX_TOKENS: int = 4096
 								    # 各LLM提供商的API Key配置（兼容单独配置）
 								    OPENAI_API_KEY: Optional[str] = None
 								    OPENAI_BASE_URL: Optional[str] = None
 								    GEMINI_API_KEY: Optional[str] = None
 								    CLAUDE_API_KEY: Optional[str] = None
 								    QWEN_API_KEY: Optional[str] = None
 								    DEEPSEEK_API_KEY: Optional[str] = None
 								    ZHIPU_API_KEY: Optional[str] = None
 								    MOONSHOT_API_KEY: Optional[str] = None
 								    BAIDU_API_KEY: Optional[str] = None  # 格式: api_key:secret_key
 								    MINIMAX_API_KEY: Optional[str] = None
 								    DOUBAO_API_KEY: Optional[str] = None
 								    OLLAMA_BASE_URL: Optional[str] = "http://localhost:11434/v1"
 								    # GitHub配置
 								    GITHUB_TOKEN: Optional[str] = None
 								    # GitLab配置
 								    GITLAB_TOKEN: Optional[str] = None
-												feat: add Gitea repository support

											
										
										
											2025-12-16 16:36:08 +08:00
 								    # Gitea配置
 								    GITEA_TOKEN: Optional[str] = None
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								    # 扫描配置
-												feat: 将最大分析文件数默认值改为0表示无限制

修改前后端配置文件和文档，将 MAX_ANALYZE_FILES 默认值从50改为0表示无限制
同时更新相关逻辑判断条件，仅在 MAX_ANALYZE_FILES > 0 时进行文件数限制

											
										
										
											2025-12-16 13:04:09 +08:00
+								    MAX_ANALYZE_FILES: int = 0  # 最大分析文件数，0表示无限制
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    MAX_FILE_SIZE_BYTES: int = 200 * 1024  # 最大文件大小 200KB
 								    LLM_CONCURRENCY: int = 3  # LLM并发数
 								    LLM_GAP_MS: int = 2000  # LLM请求间隔（毫秒）
-												feat(projects): add ZIP file upload support and source type tracking

- Add source_type field to projects model to distinguish between repository and ZIP sources
- Implement ZIP file storage service with save, load, delete, and metadata operations
- Add database migration to populate source_type for existing projects
- Create ZIP upload endpoint with file handling and metadata tracking
- Add ZIP download endpoint for project file retrieval
- Implement project ZIP info endpoint to check file status and metadata
- Update project creation to support both repository and ZIP source types
- Add project type constants and utility functions for source type handling
- Update database export/import to include source_type field
- Extend frontend components to support ZIP file uploads in project creation
- Add instant analysis page for direct ZIP file scanning without project creation
- Update .gitignore to exclude uploaded ZIP files and metadata
- Enhance project detail and task detail pages with ZIP file management UI

											
										
										
											2025-11-28 17:38:12 +08:00
+								    # ZIP文件存储配置
 								    ZIP_STORAGE_PATH: str = "./uploads/zip_files"  # ZIP文件存储目录
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
+								    # 输出语言配置 - 支持 zh-CN（中文）和 en-US（英文）
 								    OUTPUT_LANGUAGE: str = "zh-CN"
-												feat(agent): implement Agent audit module with LangGraph integration

- Introduce new Agent audit functionality for autonomous code security analysis and vulnerability verification.
- Add API endpoints for managing Agent tasks and configurations.
- Implement UI components for Agent mode selection and embedding model configuration.
- Enhance the overall architecture with a focus on RAG (Retrieval-Augmented Generation) for improved code semantic search.
- Create a sandbox environment for secure execution of vulnerability tests.
- Update documentation to include details on the new Agent audit features and usage instructions.

											
										
										
											2025-12-11 19:09:10 +08:00
 								    # ============ Agent 模块配置 ============
-												feat(agent): 增强 RAG 配置和工具集成

- 扩展嵌入模型配置选项，支持独立 API Key 和 Base URL
- 重构 RAG 初始化逻辑，支持用户自定义嵌入配置
- 新增语义搜索工具并集成到 Recon 和 Analysis Agent
- 完善系统提示，明确不同代码搜索工具的使用场景

											
										
										
											2025-12-16 13:57:27 +08:00
 								    # 嵌入模型配置（独立于 LLM 配置）
-												[feat] 添加 qwen 嵌入模型提供商

											
										
										
											2025-12-19 15:14:39 +08:00
+								    EMBEDDING_PROVIDER: str = "openai"  # openai, azure, ollama, cohere, huggingface, jina, qwen
-												feat(agent): implement Agent audit module with LangGraph integration

- Introduce new Agent audit functionality for autonomous code security analysis and vulnerability verification.
- Add API endpoints for managing Agent tasks and configurations.
- Implement UI components for Agent mode selection and embedding model configuration.
- Enhance the overall architecture with a focus on RAG (Retrieval-Augmented Generation) for improved code semantic search.
- Create a sandbox environment for secure execution of vulnerability tests.
- Update documentation to include details on the new Agent audit features and usage instructions.

											
										
										
											2025-12-11 19:09:10 +08:00
+								    EMBEDDING_MODEL: str = "text-embedding-3-small"
-												feat(agent): 增强 RAG 配置和工具集成

- 扩展嵌入模型配置选项，支持独立 API Key 和 Base URL
- 重构 RAG 初始化逻辑，支持用户自定义嵌入配置
- 新增语义搜索工具并集成到 Recon 和 Analysis Agent
- 完善系统提示，明确不同代码搜索工具的使用场景

											
										
										
											2025-12-16 13:57:27 +08:00
+								    EMBEDDING_API_KEY: Optional[str] = None  # 嵌入模型专用 API Key（留空则使用 LLM_API_KEY）
 								    EMBEDDING_BASE_URL: Optional[str] = None  # 嵌入模型专用 Base URL（留空使用提供商默认地址）
-												feat(agent): implement Agent audit module with LangGraph integration

- Introduce new Agent audit functionality for autonomous code security analysis and vulnerability verification.
- Add API endpoints for managing Agent tasks and configurations.
- Implement UI components for Agent mode selection and embedding model configuration.
- Enhance the overall architecture with a focus on RAG (Retrieval-Augmented Generation) for improved code semantic search.
- Create a sandbox environment for secure execution of vulnerability tests.
- Update documentation to include details on the new Agent audit features and usage instructions.

											
										
										
											2025-12-11 19:09:10 +08:00
 								    # 向量数据库配置
 								    VECTOR_DB_PATH: str = "./data/vector_db"  # 向量数据库持久化目录
-												✨ feat(SSH)：添加known_hosts持久化与清理功能

- 新增SSH配置目录设置，支持持久化存储known_hosts文件
- 实现known_hosts文件清理API端点，解决主机密钥变更导致的连接问题
- 优化SSH连接策略，使用StrictHostKeyChecking=accept-new自动接受新主机密钥
- 前端添加known_hosts清理按钮，提升SSH密钥管理体验
- 改进SSH测试逻辑，正确处理部署密钥的Anonymous响应

											
										
										
											2025-12-26 09:33:55 +08:00
 								    # SSH配置
 								    SSH_CONFIG_PATH: str = "./data/ssh"  # SSH配置目录（存储known_hosts等）
-												feat(agent): implement Agent audit module with LangGraph integration

- Introduce new Agent audit functionality for autonomous code security analysis and vulnerability verification.
- Add API endpoints for managing Agent tasks and configurations.
- Implement UI components for Agent mode selection and embedding model configuration.
- Enhance the overall architecture with a focus on RAG (Retrieval-Augmented Generation) for improved code semantic search.
- Create a sandbox environment for secure execution of vulnerability tests.
- Update documentation to include details on the new Agent audit features and usage instructions.

											
										
										
											2025-12-11 19:09:10 +08:00
 								    # Agent 配置
 								    AGENT_MAX_ITERATIONS: int = 50  # Agent 最大迭代次数
 								    AGENT_TOKEN_BUDGET: int = 100000  # Agent Token 预算
 								    AGENT_TIMEOUT_SECONDS: int = 1800  # Agent 超时时间（30分钟）
-												feat: Update deployment configurations, dependency management, and documentation, including a new sandbox build service.

											
										
										
											2025-12-15 15:18:55 +08:00
+								    # 沙箱配置（必须）
 								    SANDBOX_IMAGE: str = "deepaudit/sandbox:latest"  # 沙箱 Docker 镜像
-												feat(agent): implement Agent audit module with LangGraph integration

- Introduce new Agent audit functionality for autonomous code security analysis and vulnerability verification.
- Add API endpoints for managing Agent tasks and configurations.
- Implement UI components for Agent mode selection and embedding model configuration.
- Enhance the overall architecture with a focus on RAG (Retrieval-Augmented Generation) for improved code semantic search.
- Create a sandbox environment for secure execution of vulnerability tests.
- Update documentation to include details on the new Agent audit features and usage instructions.

											
										
										
											2025-12-11 19:09:10 +08:00
+								    SANDBOX_MEMORY_LIMIT: str = "512m"  # 沙箱内存限制
 								    SANDBOX_CPU_LIMIT: float = 1.0  # 沙箱 CPU 限制
 								    SANDBOX_TIMEOUT: int = 60  # 沙箱命令超时（秒）
 								    SANDBOX_NETWORK_MODE: str = "none"  # 沙箱网络模式 (none, bridge)
 								    # RAG 配置
 								    RAG_CHUNK_SIZE: int = 1500  # 代码块大小（Token）
 								    RAG_CHUNK_OVERLAP: int = 50  # 代码块重叠（Token）
 								    RAG_TOP_K: int = 10  # 检索返回数量
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								    class Config:
 								        case_sensitive = True
 								        env_file = ".env"
-												refactor(llm): consolidate LLM adapters with LiteLLM unified layer

- Replace individual adapter implementations (OpenAI, Claude, Gemini, DeepSeek, Qwen, Zhipu, Moonshot, Ollama) with unified LiteLLM adapter
- Keep native adapters for providers with special API formats (Baidu, MiniMax, Doubao)
- Update LLM factory to route requests through LiteLLM for supported providers
- Add test-llm endpoint to validate LLM connections with configurable timeout and token limits
- Add get-llm-providers endpoint to retrieve supported providers and their configurations
- Update config.py to ignore extra environment variables (VITE_* frontend variables)
- Refactor Baidu adapter to use new complete() method signature and improve error handling
- Update pyproject.toml dependencies to include litellm package
- Update env.example with new configuration options
- Simplify adapter initialization and reduce code duplication across multiple provider implementations

											
										
										
											2025-11-28 16:41:39 +08:00
+								        extra = "ignore"  # 忽略额外的环境变量（如 VITE_* 前端变量）
-												refactor: 重构项目结构，将前端和后端代码分离到独立目录

- 将前端代码移动到 frontend/ 目录
- 将后端代码移动到 backend/ 目录
- 更新 .gitignore 以包含 Python 和前端构建产物
- 修复 LLM JSON 解析问题，增强错误处理
- 修复前端配置默认值，改为从后端获取
- 删除 AdminDashboard 中的数据库信息和统计卡片
- 完善系统配置管理，支持从后端获取默认配置

											
										
										
											2025-11-26 21:11:12 +08:00
 								settings = Settings()