feat: add PR synchronization prompt and incorporate commit diffs and conversation history into PR review and chat.
This commit is contained in:
parent
23ac263d76
commit
d01d3850df
|
|
@ -10,121 +10,143 @@ from typing import Optional
|
|||
# Base Template
|
||||
# -----------------------------------------------------------------------------
|
||||
# strict structure to ensure the LLM has all necessary context without hallucinations.
|
||||
# -----------------------------------------------------------------------------
|
||||
# 基础模板
|
||||
# -----------------------------------------------------------------------------
|
||||
# 严格的结构,确保 LLM 拥有所有必要的上下文而不会产生幻觉。
|
||||
PROMPT_TEMPLATE = """
|
||||
### ROLE
|
||||
### 角色
|
||||
{system_prompt}
|
||||
|
||||
### CONTEXT FROM REPOSITORY
|
||||
The following code snippets were retrieved from the existing repository to provide context:
|
||||
### 仓库上下文
|
||||
系统从现有仓库中检索了以下代码片段,以提供背景信息:
|
||||
{repo_context}
|
||||
|
||||
### PR DIFF / CHANGES
|
||||
The following are the actual changes in this Pull Request (or specific commit):
|
||||
### PR 差异 / 变更内容 (Diff)
|
||||
以下是本次 Pull Request (或特定提交) 中的实际变更:
|
||||
{diff_content}
|
||||
|
||||
### CONVERSATION HISTORY
|
||||
### 对话历史
|
||||
{conversation_history}
|
||||
|
||||
### TASK
|
||||
### 任务
|
||||
{task_description}
|
||||
|
||||
### OUTPUT FORMAT
|
||||
### 输出格式
|
||||
{output_format}
|
||||
"""
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1. PR Review Prompts
|
||||
# 1. PR 评审提示词
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
REVIEW_SYSTEM_PROMPT = """
|
||||
You are DeepAudit Bot, an expert Senior Security Engineer and Code Reviewer.
|
||||
Your goal is to identify security vulnerabilities, potential bugs, and code quality issues in the provided Pull Request changes.
|
||||
You must ground your analysis in the provided Repository Context to understand how the changes impact the broader system.
|
||||
你是 AI Code Review Bot,一位资深的安全工程师和代码审查专家。
|
||||
你的目标是识别提供的 Pull Request 变更中的安全漏洞、潜在 Bug 和代码质量问题。
|
||||
你必须基于提供的“仓库上下文”进行分析,以理解变更对整个系统的影响。
|
||||
"""
|
||||
|
||||
PR_REVIEW_TASK = """
|
||||
Analyze the "PR DIFF / CHANGES" above, considering the "CONTEXT FROM REPOSITORY".
|
||||
分析上方的“PR 差异 / 变更内容 (Diff)”,并结合“仓库上下文”。
|
||||
|
||||
1. **Security Analysis**: Identify any security risks (e.g., Injection, Auth bypass, Hardcoded secrets, etc.).
|
||||
2. **Logic & Bugs**: Find edge cases or logic errors introduced in this change.
|
||||
3. **Quality & Performance**: Point out maintainability issues or performance bottlenecks.
|
||||
4. **Context check**: Use the repo context to verify if function calls or contract changes are valid.
|
||||
1. **安全分析**:识别任何安全风险(例如:注入、权限绕过、硬编码密钥等)。
|
||||
2. **逻辑与 Bug**:寻找本次变更引入的边界情况或逻辑错误。
|
||||
3. **质量与性能**:指出可维护性问题或性能瓶颈。
|
||||
4. **上下文检查**:利用仓库上下文核实函数调用或代码变更是否有效且符合现有架构。
|
||||
|
||||
Ignore minor formatting/linting issues unless they severely impact readability.
|
||||
除非严重影响可读性,否则请忽略细微的格式或 Lint 问题。
|
||||
"""
|
||||
|
||||
PR_REVIEW_OUTPUT_FORMAT = """
|
||||
Output ONLY a Markdown response in the following format:
|
||||
仅输出 Markdown 格式的响应,格式如下:
|
||||
|
||||
## 🔍 DeepAudit Review Summary
|
||||
<Short summary of the changes and overall risk level>
|
||||
## 🔍 DeepAudit 评审摘要
|
||||
<简要总结变更内容及整体风险等级>
|
||||
|
||||
## 🛡️ Key Issues Found
|
||||
### [Severity: High/Medium/Low] <Title of Issue>
|
||||
- **File**: `<filepath>`
|
||||
- **Problem**: <Description>
|
||||
- **Context**: <Why this is an issue based on repo context>
|
||||
- **Suggestion**:
|
||||
```<language>
|
||||
<code fix>
|
||||
## 🛡️ 发现评审意见
|
||||
### [严重程度: 高/中/低] <意见标题>
|
||||
- **文件**: `<文件路径>`
|
||||
- **意见**: <描述具体问题或改进建议>
|
||||
- **上下文**: <基于仓库上下文说明为什么这值得关注>
|
||||
- **改进建议**:
|
||||
```<语言>
|
||||
<修复建议代码>
|
||||
```
|
||||
|
||||
... (Repeat for other issues)
|
||||
... (按需重复上述结构)
|
||||
|
||||
## 💡 Improvements
|
||||
- <Bullet points for minor improvements>
|
||||
## 💡 优化建议
|
||||
- <细微改进的列表>
|
||||
"""
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 2. Incremental (Sync) Review Prompts
|
||||
# 2. 增量 (Sync) 评审提示词
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
PR_SYNC_TASK = """
|
||||
The user has pushed new commits to the existing Pull Request.
|
||||
Focus ONLY on the changes in "PR DIFF / CHANGES" (which are the new commits).
|
||||
Check if these new changes introduce any new issues or fail to address previous concerns (visible in history).
|
||||
用户向现有的 Pull Request 推送了新的提交。
|
||||
请参考下方的“PR 差异 / 变更内容 (Diff)”中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景,
|
||||
但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**。
|
||||
|
||||
1. **安全分析**:识别本次新提交是否引入了任何安全风险。
|
||||
2. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。
|
||||
3. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。
|
||||
4. **上下文检查**:利用仓库上下文核实新代码是否有效。
|
||||
|
||||
请确保评审意见清晰指出哪些是针对本次新提交的反馈。
|
||||
"""
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 3. Chat / Q&A Prompts
|
||||
# 3. 聊天 / 问答提示词
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
CHAT_SYSTEM_PROMPT = """
|
||||
You are DeepAudit Bot, a helpful AI assistant integrated into the CI/CD workflow.
|
||||
You are chatting with a developer in a Pull Request comment thread.
|
||||
The user has mentioned you (@ai-bot) to ask a question or request clarification.
|
||||
You have access to the relevant snippets of the codebase via RAG (Retrieval Augmented Generation).
|
||||
你是 DeepAudit Bot,一个集成在 CI/CD 工作流中的得力 AI 助手。
|
||||
你正在 PR 评论区与开发者交流。
|
||||
用户提到了你 (@ai-bot) 以询问问题或请求澄清。
|
||||
你可以通过 RAG (检索增强生成) 访问代码库的相关片段,并能看到当前的 PR 差异。
|
||||
"""
|
||||
|
||||
BOT_CHAT_TASK = """
|
||||
Answer the user's question or respond to their comment found in "CONVERSATION HISTORY".
|
||||
Use the "CONTEXT FROM REPOSITORY" to provide accurate, specific answers about the code.
|
||||
If the context doesn't contain the answer, admit it or provide a best-effort answer based on general knowledge.
|
||||
回答用户在“对话历史”中提出的问题或评论。
|
||||
利用“仓库上下文”和“PR 差异”来提供关于代码的准确、具体的回答。
|
||||
如果上下文中不包含答案,请如实告知,或基于通用知识提供最佳建议。
|
||||
|
||||
Do NOT repeat the user's question. Go straight to the answer.
|
||||
请不要重复用户的问题,直接开始回答。
|
||||
"""
|
||||
|
||||
BOT_CHAT_OUTPUT_FORMAT = """
|
||||
Markdown text. Be concise but technical.
|
||||
Markdown 格式。简洁且具有技术专业性。
|
||||
"""
|
||||
|
||||
def build_pr_review_prompt(diff: str, context: str, history: str = "None") -> str:
|
||||
def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str:
|
||||
return PROMPT_TEMPLATE.format(
|
||||
system_prompt=REVIEW_SYSTEM_PROMPT,
|
||||
repo_context=context if context else "No additional context retrieved.",
|
||||
repo_context=context if context else "未检索到相关的仓库上下文。",
|
||||
diff_content=diff,
|
||||
conversation_history=history,
|
||||
task_description=PR_REVIEW_TASK,
|
||||
output_format=PR_REVIEW_OUTPUT_FORMAT
|
||||
)
|
||||
|
||||
def build_chat_prompt(user_query: str, context: str, history: str) -> str:
|
||||
# Note: user_query is conceptually part of the history/task
|
||||
def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str:
|
||||
combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}"
|
||||
return PROMPT_TEMPLATE.format(
|
||||
system_prompt=REVIEW_SYSTEM_PROMPT,
|
||||
repo_context=context if context else "未检索到相关的仓库上下文。",
|
||||
diff_content=combined_diff,
|
||||
conversation_history=history,
|
||||
task_description=PR_SYNC_TASK,
|
||||
output_format=PR_REVIEW_OUTPUT_FORMAT
|
||||
)
|
||||
|
||||
def build_chat_prompt(user_query: str, context: str, history: str, diff: str = "暂无相关 Diff") -> str:
|
||||
# 注意:user_query 在概念上是对话历史/任务的一部分
|
||||
return PROMPT_TEMPLATE.format(
|
||||
system_prompt=CHAT_SYSTEM_PROMPT,
|
||||
repo_context=context if context else "No additional context retrieved.",
|
||||
diff_content="[Not applicable for general chat, unless user refers to recent changes]",
|
||||
repo_context=context if context else "未检索到相关的仓库上下文。",
|
||||
diff_content=diff,
|
||||
conversation_history=history,
|
||||
task_description=BOT_CHAT_TASK + f"\n\nUSER QUESTION: {user_query}",
|
||||
task_description=BOT_CHAT_TASK + f"\n\n用户问题: {user_query}",
|
||||
output_format=BOT_CHAT_OUTPUT_FORMAT
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from app.models.project import Project
|
|||
from app.models.ci import PRReview
|
||||
from app.core.ci_prompts import (
|
||||
build_pr_review_prompt,
|
||||
build_pr_sync_prompt,
|
||||
build_chat_prompt,
|
||||
PR_SYNC_TASK
|
||||
)
|
||||
|
|
@ -93,13 +94,26 @@ class CIService:
|
|||
context_results = await retriever.retrieve(diff_text[:1000], top_k=5)
|
||||
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
||||
|
||||
# 5. Generate Review
|
||||
history = ""
|
||||
|
||||
# 5. 生成评审
|
||||
if action == "synchronize":
|
||||
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
||||
prompt += f"\n\nNOTE: {PR_SYNC_TASK}"
|
||||
# 增量同步模式:获取全部对话历史
|
||||
history = await self._get_conversation_history(repo, pr_number)
|
||||
|
||||
# 获取本次同步的具体差异 (commit diff)
|
||||
before_sha = payload.get("before")
|
||||
after_sha = payload.get("after")
|
||||
|
||||
sync_diff = ""
|
||||
if before_sha and after_sha:
|
||||
sync_diff = await self._get_commit_diff(repo, before_sha, after_sha)
|
||||
|
||||
if not sync_diff:
|
||||
sync_diff = "(无法获取本次提交的具体差异,请参考全量差异)"
|
||||
|
||||
prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history)
|
||||
else:
|
||||
# 新建 PR 模式:历史为空
|
||||
history = ""
|
||||
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
||||
|
||||
# Call LLM
|
||||
|
|
@ -229,12 +243,15 @@ class CIService:
|
|||
|
||||
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
||||
|
||||
# 4. Build Prompt
|
||||
# Fetch conversation history (simplified: just current comment)
|
||||
history = f"User: {query}"
|
||||
prompt = build_chat_prompt(query, repo_context, history)
|
||||
# 4. 获取 PR 差异作为上下文
|
||||
diff_text = await self._get_pr_diff(repo, issue.get("number"))
|
||||
|
||||
# 5. 构建提示词
|
||||
# 获取全部 PR 对话历史作为上下文
|
||||
history = await self._get_conversation_history(repo, issue.get("number"))
|
||||
prompt = build_chat_prompt(query, repo_context, history, diff=diff_text)
|
||||
|
||||
# 5. Generate Answer
|
||||
# 6. 生成回答
|
||||
response = await self.llm_service.chat_completion_raw(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.4
|
||||
|
|
@ -242,10 +259,10 @@ class CIService:
|
|||
|
||||
answer = response["content"]
|
||||
|
||||
# 6. Reply
|
||||
# Append context info footer
|
||||
# 7. 回复
|
||||
# 附加上下文信息页脚
|
||||
footer_parts = [f"`{r.file_path}`" for r in context_results]
|
||||
footer = "\n\n---\n*Context used: " + (", ".join(footer_parts) if footer_parts else "None (General knowledge used)") + "*"
|
||||
footer = "\n\n---\n*本次回答参考了以下文件上下文: " + (", ".join(footer_parts) if footer_parts else "无(使用了模型通用知识)") + "*"
|
||||
await self._post_gitea_comment(repo, issue.get("number"), answer + footer)
|
||||
|
||||
# 6. Record (Optional, maybe just log)
|
||||
|
|
@ -444,6 +461,25 @@ class CIService:
|
|||
logger.error(f"Failed to fetch PR diff: {e}")
|
||||
return ""
|
||||
|
||||
async def _get_commit_diff(self, repo: Dict, before: str, after: str) -> str:
|
||||
"""
|
||||
Fetch the diff between two commits from Gitea API
|
||||
"""
|
||||
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/compare/{before}...{after}.diff"
|
||||
headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(api_url, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
else:
|
||||
logger.error(f"Failed to fetch commit diff: {resp.status_code} - {resp.text[:200]}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch commit diff: {e}")
|
||||
return ""
|
||||
|
||||
async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str):
|
||||
if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
|
||||
logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured")
|
||||
|
|
@ -463,3 +499,32 @@ class CIService:
|
|||
logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to post Gitea comment: {e}")
|
||||
|
||||
async def _get_conversation_history(self, repo: Dict, issue_number: int) -> str:
|
||||
"""
|
||||
Fetch the conversation history (comments) from Gitea API
|
||||
"""
|
||||
if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
|
||||
return "无"
|
||||
|
||||
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments"
|
||||
headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(api_url, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
comments = resp.json()
|
||||
history_parts = []
|
||||
for c in comments:
|
||||
user = c.get("user", {}).get("username") or c.get("user", {}).get("login") or "未知用户"
|
||||
body = c.get("body", "")
|
||||
history_parts.append(f"{user}: {body}")
|
||||
|
||||
return "\n".join(history_parts) if history_parts else "无"
|
||||
else:
|
||||
logger.error(f"Failed to fetch conversation history: {resp.status_code} - {resp.text[:200]}")
|
||||
return "无"
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch PR conversation history: {e}")
|
||||
return "无"
|
||||
|
|
|
|||
Loading…
Reference in New Issue