From 783093f0fd0b981f87aa2fb5dc6d6180ac6d8ef3 Mon Sep 17 00:00:00 2001 From: vinland100 Date: Sun, 4 Jan 2026 17:13:01 +0800 Subject: [PATCH] feat: Improve CI service context retrieval by prioritizing sync diff and adjusting prompt generation for different PR actions. --- backend/app/core/ci_prompts.py | 18 ++++---- backend/app/services/ci_service.py | 68 +++++++++++++++--------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/backend/app/core/ci_prompts.py b/backend/app/core/ci_prompts.py index 99e48fe..b602c2d 100644 --- a/backend/app/core/ci_prompts.py +++ b/backend/app/core/ci_prompts.py @@ -86,13 +86,13 @@ PR_REVIEW_OUTPUT_FORMAT = """ PR_SYNC_TASK = """ 用户向现有的 Pull Request 推送了新的提交。 -请参考下方的“PR 差异 / 变更内容 (Diff)”中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景, -但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**。 +请分析下方的“PR 差异 / 变更内容 (Diff)”中的 **本次提交差异 (Recent Sync Diff)**。 -1. **安全分析**:识别本次新提交是否引入了任何安全风险。 -2. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。 -3. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。 -4. **上下文检查**:利用仓库上下文核实新代码是否有效。 +1. **新功能与变更总结**:在摘要中明确总结本次新提交引入的所有新功能、UI 变更或逻辑调整(即使没有安全问题)。 +2. **安全分析**:识别本次新提交是否引入了任何安全风险。 +3. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。 +4. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。 +5. **上下文检查**:利用仓库上下文核实新代码是否有效。 请确保评审意见清晰指出哪些是针对本次新提交的反馈。 如果本次同步未引入新问题且解决了旧有问题,请在“评审意见”中说明。若无任何新问题,该部分可以简单说明“未发现新增问题”。 @@ -153,12 +153,12 @@ def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str output_format=PR_REVIEW_OUTPUT_FORMAT ) -def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str: - combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}" +def build_pr_sync_prompt(sync_diff: str, context: str, history: str) -> str: + diff_content = f"--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}" return PROMPT_TEMPLATE.format( system_prompt=REVIEW_SYSTEM_PROMPT, repo_context=context if context else "未检索到相关的仓库上下文。", - diff_content=combined_diff, + diff_content=diff_content, conversation_history=history, task_description=PR_SYNC_TASK, output_format=PR_SYNC_OUTPUT_FORMAT diff --git a/backend/app/services/ci_service.py b/backend/app/services/ci_service.py index ddfebb1..f53c080 100644 --- a/backend/app/services/ci_service.py +++ b/backend/app/services/ci_service.py @@ -85,51 +85,51 @@ class CIService: logger.warning("Empty diff or failed to fetch diff. Skipping review.") return + # Determine sync diff if needed + sync_diff = "" + history = "" + if action == "synchronized": + # 增量同步模式:获取全部对话历史 + history = await self._get_conversation_history(repo, pr_number) + + # 获取本次同步的具体差异 (commit diff) + before_sha = payload.get("before") + after_sha = payload.get("after") or commit_sha + + if not before_sha: + logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...") + before_sha = await self._get_previous_review_sha(project.id, pr_number) + + if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)): + logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid. Falling back to {after_sha}^") + before_sha = f"{after_sha}^" + + if before_sha and after_sha and before_sha != after_sha: + logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}") + sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha)) + + if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""): + if str(before_sha) == str(after_sha): + sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)" + else: + sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。)" + # Retrieve context relevant to the diff retriever = CodeRetriever( collection_name=f"ci_{project.id}", persist_directory=str(CI_VECTOR_DB_DIR / project.id) ) - context_results = await retriever.retrieve(diff_text[:1000], top_k=5) + # 优先使用 sync_diff 作为检索关键词,若为空(如初次 PR)则使用全量 diff + # 增加检索字符长度到 2000 以获得更多上下文 + rag_query = sync_diff if sync_diff and "---" in sync_diff else diff_text + context_results = await retriever.retrieve(rag_query[:2000], top_k=5) repo_context = "\n".join([r.to_context_string() for r in context_results]) # 5. 生成评审 if action == "synchronized": - # 增量同步模式:获取全部对话历史 - history = await self._get_conversation_history(repo, pr_number) - - # 获取本次同步的具体差异 (commit diff) - # 优先级 1: Webhook payload 提供的 before 记录 - before_sha = payload.get("before") - after_sha = payload.get("after") or commit_sha - - # 优先级 2: 如果 payload 缺失,尝试从数据库获取上一次评审点 - if not before_sha: - logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...") - before_sha = await self._get_previous_review_sha(project.id, pr_number) - - # 校验 & 优先级 3: 如果还是没有或 SHA 无效(强推后),回退到当前提交的父节点 - if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)): - logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid (likely history rewrite). Falling back to {after_sha}^") - before_sha = f"{after_sha}^" - - sync_diff = "" - if before_sha and after_sha and before_sha != after_sha: - logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}") - sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha)) - - if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""): - # 最终兜底说明 - if str(before_sha) == str(after_sha): - sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)" - else: - sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。可能是进行了软重置后重新提交、修改了提交信息或进行不带内容的强推。)" - - prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history) + prompt = build_pr_sync_prompt(sync_diff, repo_context, history) else: - # 新建 PR 模式:历史为空 - history = "" prompt = build_pr_review_prompt(diff_text, repo_context, history) # Call LLM