diff --git a/backend/app/core/ci_prompts.py b/backend/app/core/ci_prompts.py index e48d9a5..8257a09 100644 --- a/backend/app/core/ci_prompts.py +++ b/backend/app/core/ci_prompts.py @@ -10,121 +10,143 @@ from typing import Optional # Base Template # ----------------------------------------------------------------------------- # strict structure to ensure the LLM has all necessary context without hallucinations. +# ----------------------------------------------------------------------------- +# 基础模板 +# ----------------------------------------------------------------------------- +# 严格的结构,确保 LLM 拥有所有必要的上下文而不会产生幻觉。 PROMPT_TEMPLATE = """ -### ROLE +### 角色 {system_prompt} -### CONTEXT FROM REPOSITORY -The following code snippets were retrieved from the existing repository to provide context: +### 仓库上下文 +系统从现有仓库中检索了以下代码片段,以提供背景信息: {repo_context} -### PR DIFF / CHANGES -The following are the actual changes in this Pull Request (or specific commit): +### PR 差异 / 变更内容 (Diff) +以下是本次 Pull Request (或特定提交) 中的实际变更: {diff_content} -### CONVERSATION HISTORY +### 对话历史 {conversation_history} -### TASK +### 任务 {task_description} -### OUTPUT FORMAT +### 输出格式 {output_format} """ # ----------------------------------------------------------------------------- -# 1. PR Review Prompts +# 1. PR 评审提示词 # ----------------------------------------------------------------------------- REVIEW_SYSTEM_PROMPT = """ -You are DeepAudit Bot, an expert Senior Security Engineer and Code Reviewer. -Your goal is to identify security vulnerabilities, potential bugs, and code quality issues in the provided Pull Request changes. -You must ground your analysis in the provided Repository Context to understand how the changes impact the broader system. +你是 AI Code Review Bot,一位资深的安全工程师和代码审查专家。 +你的目标是识别提供的 Pull Request 变更中的安全漏洞、潜在 Bug 和代码质量问题。 +你必须基于提供的“仓库上下文”进行分析,以理解变更对整个系统的影响。 """ PR_REVIEW_TASK = """ -Analyze the "PR DIFF / CHANGES" above, considering the "CONTEXT FROM REPOSITORY". +分析上方的“PR 差异 / 变更内容 (Diff)”,并结合“仓库上下文”。 -1. **Security Analysis**: Identify any security risks (e.g., Injection, Auth bypass, Hardcoded secrets, etc.). -2. **Logic & Bugs**: Find edge cases or logic errors introduced in this change. -3. **Quality & Performance**: Point out maintainability issues or performance bottlenecks. -4. **Context check**: Use the repo context to verify if function calls or contract changes are valid. +1. **安全分析**:识别任何安全风险(例如:注入、权限绕过、硬编码密钥等)。 +2. **逻辑与 Bug**:寻找本次变更引入的边界情况或逻辑错误。 +3. **质量与性能**:指出可维护性问题或性能瓶颈。 +4. **上下文检查**:利用仓库上下文核实函数调用或代码变更是否有效且符合现有架构。 -Ignore minor formatting/linting issues unless they severely impact readability. +除非严重影响可读性,否则请忽略细微的格式或 Lint 问题。 """ PR_REVIEW_OUTPUT_FORMAT = """ -Output ONLY a Markdown response in the following format: +仅输出 Markdown 格式的响应,格式如下: -## 🔍 DeepAudit Review Summary - +## 🔍 DeepAudit 评审摘要 +<简要总结变更内容及整体风险等级> -## 🛡️ Key Issues Found -### [Severity: High/Medium/Low] -- **File**: `<filepath>` -- **Problem**: <Description> -- **Context**: <Why this is an issue based on repo context> -- **Suggestion**: -```<language> -<code fix> +## 🛡️ 发现评审意见 +### [严重程度: 高/中/低] <意见标题> +- **文件**: `<文件路径>` +- **意见**: <描述具体问题或改进建议> +- **上下文**: <基于仓库上下文说明为什么这值得关注> +- **改进建议**: +```<语言> +<修复建议代码> ``` -... (Repeat for other issues) +... (按需重复上述结构) -## 💡 Improvements -- <Bullet points for minor improvements> +## 💡 优化建议 +- <细微改进的列表> """ # ----------------------------------------------------------------------------- -# 2. Incremental (Sync) Review Prompts +# 2. 增量 (Sync) 评审提示词 # ----------------------------------------------------------------------------- PR_SYNC_TASK = """ -The user has pushed new commits to the existing Pull Request. -Focus ONLY on the changes in "PR DIFF / CHANGES" (which are the new commits). -Check if these new changes introduce any new issues or fail to address previous concerns (visible in history). +用户向现有的 Pull Request 推送了新的提交。 +请参考下方的“PR 差异 / 变更内容 (Diff)”中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景, +但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**。 + +1. **安全分析**:识别本次新提交是否引入了任何安全风险。 +2. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。 +3. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。 +4. **上下文检查**:利用仓库上下文核实新代码是否有效。 + +请确保评审意见清晰指出哪些是针对本次新提交的反馈。 """ # ----------------------------------------------------------------------------- -# 3. Chat / Q&A Prompts +# 3. 聊天 / 问答提示词 # ----------------------------------------------------------------------------- CHAT_SYSTEM_PROMPT = """ -You are DeepAudit Bot, a helpful AI assistant integrated into the CI/CD workflow. -You are chatting with a developer in a Pull Request comment thread. -The user has mentioned you (@ai-bot) to ask a question or request clarification. -You have access to the relevant snippets of the codebase via RAG (Retrieval Augmented Generation). +你是 DeepAudit Bot,一个集成在 CI/CD 工作流中的得力 AI 助手。 +你正在 PR 评论区与开发者交流。 +用户提到了你 (@ai-bot) 以询问问题或请求澄清。 +你可以通过 RAG (检索增强生成) 访问代码库的相关片段,并能看到当前的 PR 差异。 """ BOT_CHAT_TASK = """ -Answer the user's question or respond to their comment found in "CONVERSATION HISTORY". -Use the "CONTEXT FROM REPOSITORY" to provide accurate, specific answers about the code. -If the context doesn't contain the answer, admit it or provide a best-effort answer based on general knowledge. +回答用户在“对话历史”中提出的问题或评论。 +利用“仓库上下文”和“PR 差异”来提供关于代码的准确、具体的回答。 +如果上下文中不包含答案,请如实告知,或基于通用知识提供最佳建议。 -Do NOT repeat the user's question. Go straight to the answer. +请不要重复用户的问题,直接开始回答。 """ BOT_CHAT_OUTPUT_FORMAT = """ -Markdown text. Be concise but technical. +Markdown 格式。简洁且具有技术专业性。 """ -def build_pr_review_prompt(diff: str, context: str, history: str = "None") -> str: +def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str: return PROMPT_TEMPLATE.format( system_prompt=REVIEW_SYSTEM_PROMPT, - repo_context=context if context else "No additional context retrieved.", + repo_context=context if context else "未检索到相关的仓库上下文。", diff_content=diff, conversation_history=history, task_description=PR_REVIEW_TASK, output_format=PR_REVIEW_OUTPUT_FORMAT ) -def build_chat_prompt(user_query: str, context: str, history: str) -> str: - # Note: user_query is conceptually part of the history/task +def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str: + combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}" + return PROMPT_TEMPLATE.format( + system_prompt=REVIEW_SYSTEM_PROMPT, + repo_context=context if context else "未检索到相关的仓库上下文。", + diff_content=combined_diff, + conversation_history=history, + task_description=PR_SYNC_TASK, + output_format=PR_REVIEW_OUTPUT_FORMAT + ) + +def build_chat_prompt(user_query: str, context: str, history: str, diff: str = "暂无相关 Diff") -> str: + # 注意:user_query 在概念上是对话历史/任务的一部分 return PROMPT_TEMPLATE.format( system_prompt=CHAT_SYSTEM_PROMPT, - repo_context=context if context else "No additional context retrieved.", - diff_content="[Not applicable for general chat, unless user refers to recent changes]", + repo_context=context if context else "未检索到相关的仓库上下文。", + diff_content=diff, conversation_history=history, - task_description=BOT_CHAT_TASK + f"\n\nUSER QUESTION: {user_query}", + task_description=BOT_CHAT_TASK + f"\n\n用户问题: {user_query}", output_format=BOT_CHAT_OUTPUT_FORMAT ) diff --git a/backend/app/services/ci_service.py b/backend/app/services/ci_service.py index 7c9de5d..ee33c68 100644 --- a/backend/app/services/ci_service.py +++ b/backend/app/services/ci_service.py @@ -23,6 +23,7 @@ from app.models.project import Project from app.models.ci import PRReview from app.core.ci_prompts import ( build_pr_review_prompt, + build_pr_sync_prompt, build_chat_prompt, PR_SYNC_TASK ) @@ -93,13 +94,26 @@ class CIService: context_results = await retriever.retrieve(diff_text[:1000], top_k=5) repo_context = "\n".join([r.to_context_string() for r in context_results]) - # 5. Generate Review - history = "" - + # 5. 生成评审 if action == "synchronize": - prompt = build_pr_review_prompt(diff_text, repo_context, history) - prompt += f"\n\nNOTE: {PR_SYNC_TASK}" + # 增量同步模式:获取全部对话历史 + history = await self._get_conversation_history(repo, pr_number) + + # 获取本次同步的具体差异 (commit diff) + before_sha = payload.get("before") + after_sha = payload.get("after") + + sync_diff = "" + if before_sha and after_sha: + sync_diff = await self._get_commit_diff(repo, before_sha, after_sha) + + if not sync_diff: + sync_diff = "(无法获取本次提交的具体差异,请参考全量差异)" + + prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history) else: + # 新建 PR 模式:历史为空 + history = "" prompt = build_pr_review_prompt(diff_text, repo_context, history) # Call LLM @@ -229,12 +243,15 @@ class CIService: repo_context = "\n".join([r.to_context_string() for r in context_results]) - # 4. Build Prompt - # Fetch conversation history (simplified: just current comment) - history = f"User: {query}" - prompt = build_chat_prompt(query, repo_context, history) + # 4. 获取 PR 差异作为上下文 + diff_text = await self._get_pr_diff(repo, issue.get("number")) + + # 5. 构建提示词 + # 获取全部 PR 对话历史作为上下文 + history = await self._get_conversation_history(repo, issue.get("number")) + prompt = build_chat_prompt(query, repo_context, history, diff=diff_text) - # 5. Generate Answer + # 6. 生成回答 response = await self.llm_service.chat_completion_raw( messages=[{"role": "user", "content": prompt}], temperature=0.4 @@ -242,10 +259,10 @@ class CIService: answer = response["content"] - # 6. Reply - # Append context info footer + # 7. 回复 + # 附加上下文信息页脚 footer_parts = [f"`{r.file_path}`" for r in context_results] - footer = "\n\n---\n*Context used: " + (", ".join(footer_parts) if footer_parts else "None (General knowledge used)") + "*" + footer = "\n\n---\n*本次回答参考了以下文件上下文: " + (", ".join(footer_parts) if footer_parts else "无(使用了模型通用知识)") + "*" await self._post_gitea_comment(repo, issue.get("number"), answer + footer) # 6. Record (Optional, maybe just log) @@ -444,6 +461,25 @@ class CIService: logger.error(f"Failed to fetch PR diff: {e}") return "" + async def _get_commit_diff(self, repo: Dict, before: str, after: str) -> str: + """ + Fetch the diff between two commits from Gitea API + """ + api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/compare/{before}...{after}.diff" + headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"} + + try: + async with httpx.AsyncClient() as client: + resp = await client.get(api_url, headers=headers) + if resp.status_code == 200: + return resp.text + else: + logger.error(f"Failed to fetch commit diff: {resp.status_code} - {resp.text[:200]}") + return "" + except Exception as e: + logger.error(f"Failed to fetch commit diff: {e}") + return "" + async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str): if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN: logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured") @@ -463,3 +499,32 @@ class CIService: logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}") except Exception as e: logger.error(f"Failed to post Gitea comment: {e}") + + async def _get_conversation_history(self, repo: Dict, issue_number: int) -> str: + """ + Fetch the conversation history (comments) from Gitea API + """ + if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN: + return "无" + + api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments" + headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"} + + try: + async with httpx.AsyncClient() as client: + resp = await client.get(api_url, headers=headers) + if resp.status_code == 200: + comments = resp.json() + history_parts = [] + for c in comments: + user = c.get("user", {}).get("username") or c.get("user", {}).get("login") or "未知用户" + body = c.get("body", "") + history_parts.append(f"{user}: {body}") + + return "\n".join(history_parts) if history_parts else "无" + else: + logger.error(f"Failed to fetch conversation history: {resp.status_code} - {resp.text[:200]}") + return "无" + except Exception as e: + logger.error(f"Failed to fetch PR conversation history: {e}") + return "无"