feat: add PR synchronization prompt and incorporate commit diffs and conversation history into PR review and chat.

2026-01-04 14:34:29 +08:00 · 2026-01-04 14:34:29 +08:00 · d01d3850df
parent 23ac263d76
commit d01d3850df
2 changed files with 153 additions and 66 deletions
--- a/backend/app/core/ci_prompts.py
+++ b/backend/app/core/ci_prompts.py
@ -10,121 +10,143 @@ from typing import Optional
 # Base Template
 # -----------------------------------------------------------------------------
 # strict structure to ensure the LLM has all necessary context without hallucinations.
+# -----------------------------------------------------------------------------
+# 基础模板
+# -----------------------------------------------------------------------------
+# 严格的结构，确保 LLM 拥有所有必要的上下文而不会产生幻觉。
 PROMPT_TEMPLATE = """
-### ROLE
+### 角色
 {system_prompt}

-### CONTEXT FROM REPOSITORY
-The following code snippets were retrieved from the existing repository to provide context:
+### 仓库上下文
+系统从现有仓库中检索了以下代码片段，以提供背景信息：
 {repo_context}

-### PR DIFF / CHANGES
-The following are the actual changes in this Pull Request (or specific commit):
+### PR 差异 / 变更内容 (Diff)
+以下是本次 Pull Request (或特定提交) 中的实际变更：
 {diff_content}

-### CONVERSATION HISTORY
+### 对话历史
 {conversation_history}

-### TASK
+### 任务
 {task_description}

-### OUTPUT FORMAT
+### 输出格式
 {output_format}
 """

 # -----------------------------------------------------------------------------
-# 1. PR Review Prompts
+# 1. PR 评审提示词
 # -----------------------------------------------------------------------------

 REVIEW_SYSTEM_PROMPT = """
-You are DeepAudit Bot, an expert Senior Security Engineer and Code Reviewer.
-Your goal is to identify security vulnerabilities, potential bugs, and code quality issues in the provided Pull Request changes.
-You must ground your analysis in the provided Repository Context to understand how the changes impact the broader system.
+你是 AI Code Review Bot，一位资深的安全工程师和代码审查专家。
+你的目标是识别提供的 Pull Request 变更中的安全漏洞、潜在 Bug 和代码质量问题。
+你必须基于提供的“仓库上下文”进行分析，以理解变更对整个系统的影响。
 """

 PR_REVIEW_TASK = """
-Analyze the "PR DIFF / CHANGES" above, considering the "CONTEXT FROM REPOSITORY".
+分析上方的“PR 差异 / 变更内容 (Diff)”，并结合“仓库上下文”。

-1. **Security Analysis**: Identify any security risks (e.g., Injection, Auth bypass, Hardcoded secrets, etc.).
-2. **Logic & Bugs**: Find edge cases or logic errors introduced in this change.
-3. **Quality & Performance**: Point out maintainability issues or performance bottlenecks.
-4. **Context check**: Use the repo context to verify if function calls or contract changes are valid.
+1. **安全分析**：识别任何安全风险（例如：注入、权限绕过、硬编码密钥等）。
+2. **逻辑与 Bug**：寻找本次变更引入的边界情况或逻辑错误。
+3. **质量与性能**：指出可维护性问题或性能瓶颈。
+4. **上下文检查**：利用仓库上下文核实函数调用或代码变更是否有效且符合现有架构。

-Ignore minor formatting/linting issues unless they severely impact readability.
+除非严重影响可读性，否则请忽略细微的格式或 Lint 问题。
 """

 PR_REVIEW_OUTPUT_FORMAT = """
-Output ONLY a Markdown response in the following format:
+仅输出 Markdown 格式的响应，格式如下：

-## 🔍 DeepAudit Review Summary
-<Short summary of the changes and overall risk level>
+## 🔍 DeepAudit 评审摘要
+<简要总结变更内容及整体风险等级>

-## 🛡️ Key Issues Found
-### [Severity: High/Medium/Low] <Title of Issue>
- **File**: `<filepath>`
- **Problem**: <Description>
- **Context**: <Why this is an issue based on repo context>
- **Suggestion**:
-```<language>
-<code fix>
+## 🛡️ 发现评审意见
+### [严重程度: 高/中/低] <意见标题>
+- **文件**: `<文件路径>`
+- **意见**: <描述具体问题或改进建议>
+- **上下文**: <基于仓库上下文说明为什么这值得关注>
+- **改进建议**:
+```<语言>
+<修复建议代码>
 ```

-... (Repeat for other issues)
+... (按需重复上述结构)

-## 💡 Improvements
- <Bullet points for minor improvements>
+## 💡 优化建议
+- <细微改进的列表>
 """

 # -----------------------------------------------------------------------------
-# 2. Incremental (Sync) Review Prompts
+# 2. 增量 (Sync) 评审提示词
 # -----------------------------------------------------------------------------

 PR_SYNC_TASK = """
-The user has pushed new commits to the existing Pull Request.
-Focus ONLY on the changes in "PR DIFF / CHANGES" (which are the new commits).
-Check if these new changes introduce any new issues or fail to address previous concerns (visible in history).
+用户向现有的 Pull Request 推送了新的提交。
+请参考下方的“PR 差异 / 变更内容 (Diff)”中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景，
+但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**。
+
+1. **安全分析**：识别本次新提交是否引入了任何安全风险。
+2. **逻辑与 Bug**：寻找本次新提交中的边界情况或逻辑错误。
+3. **回归检查**：核实本次新提交是否解决了之前提到的疑虑，或者是否破坏了已有逻辑。
+4. **上下文检查**：利用仓库上下文核实新代码是否有效。
+
+请确保评审意见清晰指出哪些是针对本次新提交的反馈。
 """

 # -----------------------------------------------------------------------------
-# 3. Chat / Q&A Prompts
+# 3. 聊天 / 问答提示词
 # -----------------------------------------------------------------------------

 CHAT_SYSTEM_PROMPT = """
-You are DeepAudit Bot, a helpful AI assistant integrated into the CI/CD workflow.
-You are chatting with a developer in a Pull Request comment thread.
-The user has mentioned you (@ai-bot) to ask a question or request clarification.
-You have access to the relevant snippets of the codebase via RAG (Retrieval Augmented Generation).
+你是 DeepAudit Bot，一个集成在 CI/CD 工作流中的得力 AI 助手。
+你正在 PR 评论区与开发者交流。
+用户提到了你 (@ai-bot) 以询问问题或请求澄清。
+你可以通过 RAG (检索增强生成) 访问代码库的相关片段，并能看到当前的 PR 差异。
 """

 BOT_CHAT_TASK = """
-Answer the user's question or respond to their comment found in "CONVERSATION HISTORY".
-Use the "CONTEXT FROM REPOSITORY" to provide accurate, specific answers about the code.
-If the context doesn't contain the answer, admit it or provide a best-effort answer based on general knowledge.
+回答用户在“对话历史”中提出的问题或评论。
+利用“仓库上下文”和“PR 差异”来提供关于代码的准确、具体的回答。
+如果上下文中不包含答案，请如实告知，或基于通用知识提供最佳建议。

-Do NOT repeat the user's question. Go straight to the answer.
+请不要重复用户的问题，直接开始回答。
 """

 BOT_CHAT_OUTPUT_FORMAT = """
-Markdown text. Be concise but technical.
+Markdown 格式。简洁且具有技术专业性。
 """

-def build_pr_review_prompt(diff: str, context: str, history: str = "None") -> str:
+def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str:
    return PROMPT_TEMPLATE.format(
        system_prompt=REVIEW_SYSTEM_PROMPT,
-        repo_context=context if context else "No additional context retrieved.",
+        repo_context=context if context else "未检索到相关的仓库上下文。",
        diff_content=diff,
        conversation_history=history,
        task_description=PR_REVIEW_TASK,
        output_format=PR_REVIEW_OUTPUT_FORMAT
    )

-def build_chat_prompt(user_query: str, context: str, history: str) -> str:
-    # Note: user_query is conceptually part of the history/task
+def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str:
+    combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}"
+    return PROMPT_TEMPLATE.format(
+        system_prompt=REVIEW_SYSTEM_PROMPT,
+        repo_context=context if context else "未检索到相关的仓库上下文。",
+        diff_content=combined_diff,
+        conversation_history=history,
+        task_description=PR_SYNC_TASK,
+        output_format=PR_REVIEW_OUTPUT_FORMAT
+    )
+
+def build_chat_prompt(user_query: str, context: str, history: str, diff: str = "暂无相关 Diff") -> str:
+    # 注意：user_query 在概念上是对话历史/任务的一部分
    return PROMPT_TEMPLATE.format(
        system_prompt=CHAT_SYSTEM_PROMPT,
-        repo_context=context if context else "No additional context retrieved.",
-        diff_content="[Not applicable for general chat, unless user refers to recent changes]",
+        repo_context=context if context else "未检索到相关的仓库上下文。",
+        diff_content=diff,
        conversation_history=history,
-        task_description=BOT_CHAT_TASK + f"\n\nUSER QUESTION: {user_query}",
+        task_description=BOT_CHAT_TASK + f"\n\n用户问题: {user_query}",
        output_format=BOT_CHAT_OUTPUT_FORMAT
    )
--- a/backend/app/services/ci_service.py
+++ b/backend/app/services/ci_service.py
@ -23,6 +23,7 @@ from app.models.project import Project
 from app.models.ci import PRReview
 from app.core.ci_prompts import (
    build_pr_review_prompt, 
+    build_pr_sync_prompt,
    build_chat_prompt, 
    PR_SYNC_TASK
 )
@ -93,13 +94,26 @@ class CIService:
            context_results = await retriever.retrieve(diff_text[:1000], top_k=5)
            repo_context = "\n".join([r.to_context_string() for r in context_results])
            
-            # 5. Generate Review
-            history = "" 
-            
+            # 5. 生成评审
            if action == "synchronize":
-                 prompt = build_pr_review_prompt(diff_text, repo_context, history)
-                 prompt += f"\n\nNOTE: {PR_SYNC_TASK}"
+                 # 增量同步模式：获取全部对话历史
+                 history = await self._get_conversation_history(repo, pr_number)
+                 
+                 # 获取本次同步的具体差异 (commit diff)
+                 before_sha = payload.get("before")
+                 after_sha = payload.get("after")
+                 
+                 sync_diff = ""
+                 if before_sha and after_sha:
+                     sync_diff = await self._get_commit_diff(repo, before_sha, after_sha)
+                 
+                 if not sync_diff:
+                     sync_diff = "（无法获取本次提交的具体差异，请参考全量差异）"
+                     
+                 prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history)
            else:
+                 # 新建 PR 模式：历史为空
+                 history = "" 
                 prompt = build_pr_review_prompt(diff_text, repo_context, history)
                 
            # Call LLM
@ -229,12 +243,15 @@ class CIService:

        repo_context = "\n".join([r.to_context_string() for r in context_results])
        
-        # 4. Build Prompt
-        # Fetch conversation history (simplified: just current comment)
-        history = f"User: {query}"
-        prompt = build_chat_prompt(query, repo_context, history)
+        # 4. 获取 PR 差异作为上下文
+        diff_text = await self._get_pr_diff(repo, issue.get("number"))
+
+        # 5. 构建提示词
+        # 获取全部 PR 对话历史作为上下文
+        history = await self._get_conversation_history(repo, issue.get("number"))
+        prompt = build_chat_prompt(query, repo_context, history, diff=diff_text)
        
-        # 5. Generate Answer
+        # 6. 生成回答
        response = await self.llm_service.chat_completion_raw(
            messages=[{"role": "user", "content": prompt}],
            temperature=0.4
@ -242,10 +259,10 @@ class CIService:
        
        answer = response["content"]
        
-        # 6. Reply
-        # Append context info footer
+        # 7. 回复
+        # 附加上下文信息页脚
        footer_parts = [f"`{r.file_path}`" for r in context_results]
-        footer = "\n\n---\n*Context used: " + (", ".join(footer_parts) if footer_parts else "None (General knowledge used)") + "*"
+        footer = "\n\n---\n*本次回答参考了以下文件上下文: " + (", ".join(footer_parts) if footer_parts else "无（使用了模型通用知识）") + "*"
        await self._post_gitea_comment(repo, issue.get("number"), answer + footer)
        
        # 6. Record (Optional, maybe just log)
@ -444,6 +461,25 @@ class CIService:
            logger.error(f"Failed to fetch PR diff: {e}")
            return ""

+    async def _get_commit_diff(self, repo: Dict, before: str, after: str) -> str:
+        """
+        Fetch the diff between two commits from Gitea API
+        """
+        api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/compare/{before}...{after}.diff"
+        headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
+        
+        try:
+            async with httpx.AsyncClient() as client:
+                resp = await client.get(api_url, headers=headers)
+                if resp.status_code == 200:
+                    return resp.text
+                else:
+                    logger.error(f"Failed to fetch commit diff: {resp.status_code} - {resp.text[:200]}")
+                    return ""
+        except Exception as e:
+            logger.error(f"Failed to fetch commit diff: {e}")
+            return ""
+
    async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str):
        if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
            logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured")
@ -463,3 +499,32 @@ class CIService:
                     logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}")
        except Exception as e:
            logger.error(f"Failed to post Gitea comment: {e}")
+
+    async def _get_conversation_history(self, repo: Dict, issue_number: int) -> str:
+        """
+        Fetch the conversation history (comments) from Gitea API
+        """
+        if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
+            return "无"
+            
+        api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments"
+        headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
+        
+        try:
+            async with httpx.AsyncClient() as client:
+                resp = await client.get(api_url, headers=headers)
+                if resp.status_code == 200:
+                    comments = resp.json()
+                    history_parts = []
+                    for c in comments:
+                        user = c.get("user", {}).get("username") or c.get("user", {}).get("login") or "未知用户"
+                        body = c.get("body", "")
+                        history_parts.append(f"{user}: {body}")
+                    
+                    return "\n".join(history_parts) if history_parts else "无"
+                else:
+                    logger.error(f"Failed to fetch conversation history: {resp.status_code} - {resp.text[:200]}")
+                    return "无"
+        except Exception as e:
+            logger.error(f"Failed to fetch PR conversation history: {e}")
+            return "无"