feat: Improve CI service context retrieval by prioritizing sync diff and adjusting prompt generation for different PR actions.

This commit is contained in:
vinland100 2026-01-04 17:13:01 +08:00
parent 70bf6ccac2
commit 783093f0fd
2 changed files with 43 additions and 43 deletions

View File

@ -86,13 +86,13 @@ PR_REVIEW_OUTPUT_FORMAT = """
PR_SYNC_TASK = """ PR_SYNC_TASK = """
用户向现有的 Pull Request 推送了新的提交 用户向现有的 Pull Request 推送了新的提交
请参考下方的PR 差异 / 变更内容 (Diff)中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景 请分析下方的PR 差异 / 变更内容 (Diff)中的 **本次提交差异 (Recent Sync Diff)**
但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**
1. **安全分析**识别本次新提交是否引入了任何安全风险 1. **新功能与变更总结**在摘要中明确总结本次新提交引入的所有新功能UI 变更或逻辑调整即使没有安全问题
2. **逻辑与 Bug**寻找本次新提交中的边界情况或逻辑错误 2. **安全分析**识别本次新提交是否引入了任何安全风险
3. **回归检查**核实本次新提交是否解决了之前提到的疑虑或者是否破坏了已有逻辑 3. **逻辑与 Bug**寻找本次新提交中的边界情况或逻辑错误
4. **上下文检查**利用仓库上下文核实新代码是否有效 4. **回归检查**核实本次新提交是否解决了之前提到的疑虑或者是否破坏了已有逻辑
5. **上下文检查**利用仓库上下文核实新代码是否有效
请确保评审意见清晰指出哪些是针对本次新提交的反馈 请确保评审意见清晰指出哪些是针对本次新提交的反馈
如果本次同步未引入新问题且解决了旧有问题请在评审意见中说明若无任何新问题该部分可以简单说明未发现新增问题 如果本次同步未引入新问题且解决了旧有问题请在评审意见中说明若无任何新问题该部分可以简单说明未发现新增问题
@ -153,12 +153,12 @@ def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str
output_format=PR_REVIEW_OUTPUT_FORMAT output_format=PR_REVIEW_OUTPUT_FORMAT
) )
def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str: def build_pr_sync_prompt(sync_diff: str, context: str, history: str) -> str:
combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}" diff_content = f"--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}"
return PROMPT_TEMPLATE.format( return PROMPT_TEMPLATE.format(
system_prompt=REVIEW_SYSTEM_PROMPT, system_prompt=REVIEW_SYSTEM_PROMPT,
repo_context=context if context else "未检索到相关的仓库上下文。", repo_context=context if context else "未检索到相关的仓库上下文。",
diff_content=combined_diff, diff_content=diff_content,
conversation_history=history, conversation_history=history,
task_description=PR_SYNC_TASK, task_description=PR_SYNC_TASK,
output_format=PR_SYNC_OUTPUT_FORMAT output_format=PR_SYNC_OUTPUT_FORMAT

View File

@ -85,51 +85,51 @@ class CIService:
logger.warning("Empty diff or failed to fetch diff. Skipping review.") logger.warning("Empty diff or failed to fetch diff. Skipping review.")
return return
# Determine sync diff if needed
sync_diff = ""
history = ""
if action == "synchronized":
# 增量同步模式:获取全部对话历史
history = await self._get_conversation_history(repo, pr_number)
# 获取本次同步的具体差异 (commit diff)
before_sha = payload.get("before")
after_sha = payload.get("after") or commit_sha
if not before_sha:
logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...")
before_sha = await self._get_previous_review_sha(project.id, pr_number)
if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)):
logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid. Falling back to {after_sha}^")
before_sha = f"{after_sha}^"
if before_sha and after_sha and before_sha != after_sha:
logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}")
sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha))
if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""):
if str(before_sha) == str(after_sha):
sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)"
else:
sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。)"
# Retrieve context relevant to the diff # Retrieve context relevant to the diff
retriever = CodeRetriever( retriever = CodeRetriever(
collection_name=f"ci_{project.id}", collection_name=f"ci_{project.id}",
persist_directory=str(CI_VECTOR_DB_DIR / project.id) persist_directory=str(CI_VECTOR_DB_DIR / project.id)
) )
context_results = await retriever.retrieve(diff_text[:1000], top_k=5) # 优先使用 sync_diff 作为检索关键词,若为空(如初次 PR则使用全量 diff
# 增加检索字符长度到 2000 以获得更多上下文
rag_query = sync_diff if sync_diff and "---" in sync_diff else diff_text
context_results = await retriever.retrieve(rag_query[:2000], top_k=5)
repo_context = "\n".join([r.to_context_string() for r in context_results]) repo_context = "\n".join([r.to_context_string() for r in context_results])
# 5. 生成评审 # 5. 生成评审
if action == "synchronized": if action == "synchronized":
# 增量同步模式:获取全部对话历史 prompt = build_pr_sync_prompt(sync_diff, repo_context, history)
history = await self._get_conversation_history(repo, pr_number)
# 获取本次同步的具体差异 (commit diff)
# 优先级 1: Webhook payload 提供的 before 记录
before_sha = payload.get("before")
after_sha = payload.get("after") or commit_sha
# 优先级 2: 如果 payload 缺失,尝试从数据库获取上一次评审点
if not before_sha:
logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...")
before_sha = await self._get_previous_review_sha(project.id, pr_number)
# 校验 & 优先级 3: 如果还是没有或 SHA 无效(强推后),回退到当前提交的父节点
if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)):
logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid (likely history rewrite). Falling back to {after_sha}^")
before_sha = f"{after_sha}^"
sync_diff = ""
if before_sha and after_sha and before_sha != after_sha:
logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}")
sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha))
if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""):
# 最终兜底说明
if str(before_sha) == str(after_sha):
sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)"
else:
sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。可能是进行了软重置后重新提交、修改了提交信息或进行不带内容的强推。)"
prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history)
else: else:
# 新建 PR 模式:历史为空
history = ""
prompt = build_pr_review_prompt(diff_text, repo_context, history) prompt = build_pr_review_prompt(diff_text, repo_context, history)
# Call LLM # Call LLM