feat: Improve CI service context retrieval by prioritizing sync diff and adjusting prompt generation for different PR actions.
This commit is contained in:
parent
70bf6ccac2
commit
783093f0fd
|
|
@ -86,13 +86,13 @@ PR_REVIEW_OUTPUT_FORMAT = """
|
||||||
|
|
||||||
PR_SYNC_TASK = """
|
PR_SYNC_TASK = """
|
||||||
用户向现有的 Pull Request 推送了新的提交。
|
用户向现有的 Pull Request 推送了新的提交。
|
||||||
请参考下方的“PR 差异 / 变更内容 (Diff)”中的 **全量差异 (Total Diff)** 以了解整个 PR 的背景,
|
请分析下方的“PR 差异 / 变更内容 (Diff)”中的 **本次提交差异 (Recent Sync Diff)**。
|
||||||
但请**重点分析并评审**其中的 **本次提交差异 (Recent Sync Diff)**。
|
|
||||||
|
|
||||||
1. **安全分析**:识别本次新提交是否引入了任何安全风险。
|
1. **新功能与变更总结**:在摘要中明确总结本次新提交引入的所有新功能、UI 变更或逻辑调整(即使没有安全问题)。
|
||||||
2. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。
|
2. **安全分析**:识别本次新提交是否引入了任何安全风险。
|
||||||
3. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。
|
3. **逻辑与 Bug**:寻找本次新提交中的边界情况或逻辑错误。
|
||||||
4. **上下文检查**:利用仓库上下文核实新代码是否有效。
|
4. **回归检查**:核实本次新提交是否解决了之前提到的疑虑,或者是否破坏了已有逻辑。
|
||||||
|
5. **上下文检查**:利用仓库上下文核实新代码是否有效。
|
||||||
|
|
||||||
请确保评审意见清晰指出哪些是针对本次新提交的反馈。
|
请确保评审意见清晰指出哪些是针对本次新提交的反馈。
|
||||||
如果本次同步未引入新问题且解决了旧有问题,请在“评审意见”中说明。若无任何新问题,该部分可以简单说明“未发现新增问题”。
|
如果本次同步未引入新问题且解决了旧有问题,请在“评审意见”中说明。若无任何新问题,该部分可以简单说明“未发现新增问题”。
|
||||||
|
|
@ -153,12 +153,12 @@ def build_pr_review_prompt(diff: str, context: str, history: str = "无") -> str
|
||||||
output_format=PR_REVIEW_OUTPUT_FORMAT
|
output_format=PR_REVIEW_OUTPUT_FORMAT
|
||||||
)
|
)
|
||||||
|
|
||||||
def build_pr_sync_prompt(total_diff: str, sync_diff: str, context: str, history: str) -> str:
|
def build_pr_sync_prompt(sync_diff: str, context: str, history: str) -> str:
|
||||||
combined_diff = f"--- [PR 全量差异 (Total Diff)] ---\n{total_diff}\n\n--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}"
|
diff_content = f"--- [本次提交差异 (Recent Sync Diff)] ---\n{sync_diff}"
|
||||||
return PROMPT_TEMPLATE.format(
|
return PROMPT_TEMPLATE.format(
|
||||||
system_prompt=REVIEW_SYSTEM_PROMPT,
|
system_prompt=REVIEW_SYSTEM_PROMPT,
|
||||||
repo_context=context if context else "未检索到相关的仓库上下文。",
|
repo_context=context if context else "未检索到相关的仓库上下文。",
|
||||||
diff_content=combined_diff,
|
diff_content=diff_content,
|
||||||
conversation_history=history,
|
conversation_history=history,
|
||||||
task_description=PR_SYNC_TASK,
|
task_description=PR_SYNC_TASK,
|
||||||
output_format=PR_SYNC_OUTPUT_FORMAT
|
output_format=PR_SYNC_OUTPUT_FORMAT
|
||||||
|
|
|
||||||
|
|
@ -85,51 +85,51 @@ class CIService:
|
||||||
logger.warning("Empty diff or failed to fetch diff. Skipping review.")
|
logger.warning("Empty diff or failed to fetch diff. Skipping review.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Determine sync diff if needed
|
||||||
|
sync_diff = ""
|
||||||
|
history = ""
|
||||||
|
if action == "synchronized":
|
||||||
|
# 增量同步模式:获取全部对话历史
|
||||||
|
history = await self._get_conversation_history(repo, pr_number)
|
||||||
|
|
||||||
|
# 获取本次同步的具体差异 (commit diff)
|
||||||
|
before_sha = payload.get("before")
|
||||||
|
after_sha = payload.get("after") or commit_sha
|
||||||
|
|
||||||
|
if not before_sha:
|
||||||
|
logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...")
|
||||||
|
before_sha = await self._get_previous_review_sha(project.id, pr_number)
|
||||||
|
|
||||||
|
if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)):
|
||||||
|
logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid. Falling back to {after_sha}^")
|
||||||
|
before_sha = f"{after_sha}^"
|
||||||
|
|
||||||
|
if before_sha and after_sha and before_sha != after_sha:
|
||||||
|
logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}")
|
||||||
|
sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha))
|
||||||
|
|
||||||
|
if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""):
|
||||||
|
if str(before_sha) == str(after_sha):
|
||||||
|
sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)"
|
||||||
|
else:
|
||||||
|
sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。)"
|
||||||
|
|
||||||
# Retrieve context relevant to the diff
|
# Retrieve context relevant to the diff
|
||||||
retriever = CodeRetriever(
|
retriever = CodeRetriever(
|
||||||
collection_name=f"ci_{project.id}",
|
collection_name=f"ci_{project.id}",
|
||||||
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
|
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
|
||||||
)
|
)
|
||||||
|
|
||||||
context_results = await retriever.retrieve(diff_text[:1000], top_k=5)
|
# 优先使用 sync_diff 作为检索关键词,若为空(如初次 PR)则使用全量 diff
|
||||||
|
# 增加检索字符长度到 2000 以获得更多上下文
|
||||||
|
rag_query = sync_diff if sync_diff and "---" in sync_diff else diff_text
|
||||||
|
context_results = await retriever.retrieve(rag_query[:2000], top_k=5)
|
||||||
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
||||||
|
|
||||||
# 5. 生成评审
|
# 5. 生成评审
|
||||||
if action == "synchronized":
|
if action == "synchronized":
|
||||||
# 增量同步模式:获取全部对话历史
|
prompt = build_pr_sync_prompt(sync_diff, repo_context, history)
|
||||||
history = await self._get_conversation_history(repo, pr_number)
|
|
||||||
|
|
||||||
# 获取本次同步的具体差异 (commit diff)
|
|
||||||
# 优先级 1: Webhook payload 提供的 before 记录
|
|
||||||
before_sha = payload.get("before")
|
|
||||||
after_sha = payload.get("after") or commit_sha
|
|
||||||
|
|
||||||
# 优先级 2: 如果 payload 缺失,尝试从数据库获取上一次评审点
|
|
||||||
if not before_sha:
|
|
||||||
logger.info(f"🔍 Webhook payload missing 'before' SHA, searching database for previous sync head...")
|
|
||||||
before_sha = await self._get_previous_review_sha(project.id, pr_number)
|
|
||||||
|
|
||||||
# 校验 & 优先级 3: 如果还是没有或 SHA 无效(强推后),回退到当前提交的父节点
|
|
||||||
if not before_sha or not await self._is_sha_valid(repo_path, str(before_sha)):
|
|
||||||
logger.warning(f"⚠️ Baseline SHA {before_sha} is missing or invalid (likely history rewrite). Falling back to {after_sha}^")
|
|
||||||
before_sha = f"{after_sha}^"
|
|
||||||
|
|
||||||
sync_diff = ""
|
|
||||||
if before_sha and after_sha and before_sha != after_sha:
|
|
||||||
logger.info(f"📂 Fetching sync diff: {before_sha} -> {after_sha}")
|
|
||||||
sync_diff = await self._get_commit_diff(repo_path, str(before_sha), str(after_sha))
|
|
||||||
|
|
||||||
if not sync_diff or (hasattr(sync_diff, "strip") and sync_diff.strip() == ""):
|
|
||||||
# 最终兜底说明
|
|
||||||
if str(before_sha) == str(after_sha):
|
|
||||||
sync_diff = "(推送的 HEAD 与上次评审点相同,无新增差异)"
|
|
||||||
else:
|
|
||||||
sync_diff = "(本次同步虽有 SHA 变动,但代码内容与上次评审点完全一致。可能是进行了软重置后重新提交、修改了提交信息或进行不带内容的强推。)"
|
|
||||||
|
|
||||||
prompt = build_pr_sync_prompt(diff_text, sync_diff, repo_context, history)
|
|
||||||
else:
|
else:
|
||||||
# 新建 PR 模式:历史为空
|
|
||||||
history = ""
|
|
||||||
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
||||||
|
|
||||||
# Call LLM
|
# Call LLM
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue