CodeReview/backend/app/services/ci_service.py

371 lines
14 KiB
Python

"""
CI Service
Handles Gitea webhook events, manages RAG indexing for CI projects, and performs automated code reviews.
"""
import os
import shutil
import logging
import subprocess
import json
from typing import Dict, Any, List, Optional
from pathlib import Path
from datetime import datetime
import asyncio
import httpx
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.core.config import settings
from app.models.project import Project
from app.models.ci import PRReview
from app.core.ci_prompts import (
build_pr_review_prompt,
build_chat_prompt,
PR_SYNC_TASK
)
from app.services.rag.indexer import CodeIndexer, IndexUpdateMode
from app.services.rag.retriever import CodeRetriever
from app.services.llm.service import LLMService
logger = logging.getLogger(__name__)
# Base directory for storing CI clones
CI_WORKSPACE_DIR = Path("data/ci_workspace")
CI_VECTOR_DB_DIR = Path("data/ci_vectordb")
class CIService:
def __init__(self, db: AsyncSession):
self.db = db
# Ensure workspaces exist
CI_WORKSPACE_DIR.mkdir(parents=True, exist_ok=True)
CI_VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True)
self.llm_service = LLMService() # Use default config
async def handle_pr_event(self, payload: Dict[str, Any]):
"""
Handle Pull Request events (opened, synchronized)
"""
action = payload.get("action")
pr = payload.get("pull_request")
repo = payload.get("repository")
if not pr or not repo:
return
repo_url = repo.get("clone_url")
pr_number = pr.get("number")
branch = pr.get("head", {}).get("ref")
commit_sha = pr.get("head", {}).get("sha")
base_branch = pr.get("base", {}).get("ref")
logger.info(f"🚀 Handling PR Event: {repo.get('full_name')} #{pr_number} ({action})")
# 1. Get or Create Project
try:
project = await self._get_or_create_project(repo, pr)
except Exception as e:
logger.error(f"Error creating project: {e}")
return
# 2. Clone/Update Repo & Indexing (RAG)
try:
repo_path = await self._prepare_repository(project, repo_url, branch, settings.GITEA_BOT_TOKEN)
except Exception as e:
logger.error(f"Git operation failed: {e}")
# If clone fails, we can't proceed with RAG, but we shouldn't crash
return
try:
# 3. Incremental Indexing
indexer = CodeIndexer(
collection_name=f"ci_{project.id}",
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
)
# Iterate over the generator to execute indexing
async for progress in indexer.smart_index_directory(
directory=repo_path,
update_mode=IndexUpdateMode.INCREMENTAL
):
if progress.processed_files % 10 == 0:
logger.info(f"Indexing progress: {progress.processed_files}/{progress.total_files}")
# 4. Analyze Diff & Retrieve Context
diff_text = await self._get_pr_diff(repo, pr_number)
if not diff_text:
logger.warning("Empty diff or failed to fetch diff. Skipping review.")
return
# Retrieve context relevant to the diff
retriever = CodeRetriever(
collection_name=f"ci_{project.id}",
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
)
context_results = await retriever.retrieve(diff_text[:1000], top_k=5)
repo_context = "\n".join([r.to_context_string() for r in context_results])
# 5. Generate Review
history = ""
if action == "synchronize":
prompt = build_pr_review_prompt(diff_text, repo_context, history)
prompt += f"\n\nNOTE: {PR_SYNC_TASK}"
else:
prompt = build_pr_review_prompt(diff_text, repo_context, history)
# Call LLM
response = await self.llm_service.chat_completion_raw(
messages=[{"role": "user", "content": prompt}],
temperature=0.2
)
review_body = response["content"]
# 6. Post Comment
await self._post_gitea_comment(repo, pr_number, review_body)
# 7. Save Record
review_record = PRReview(
project_id=project.id,
pr_number=pr_number,
commit_sha=commit_sha,
event_type=action,
summary=review_body[:200] + "...",
full_report=review_body,
context_used=json.dumps([r.file_path for r in context_results])
)
self.db.add(review_record)
# Update project activity
project.latest_pr_activity = datetime.utcnow()
await self.db.commit()
except Exception as e:
logger.error(f"Error processing PR event: {e}")
import traceback
logger.error(traceback.format_exc())
# Don't raise, just log, so webhook returns 200
return
async def handle_comment_event(self, payload: Dict[str, Any]):
"""
Handle Issue Comment events (chat)
"""
action = payload.get("action")
issue = payload.get("issue")
comment = payload.get("comment")
repo = payload.get("repository")
if action != "created" or not issue or not comment:
return
# Check if it's a PR
if "pull_request" not in issue:
return
body = comment.get("body", "")
if "@ai-bot" not in body:
return
logger.info(f"💬 Handling Chat Event: {repo.get('full_name')} #{issue.get('number')}")
# 1. Get Project (or Create if discovered via Chat first)
# We need a dummy PR object if we are creating project from chat, or we just fetch by repo
# Since _get_or_create_project needs PR info to determine branch/owner, we might need a distinct method
# or simplified flow.
project = await self._get_project_by_repo(repo.get("clone_url"))
if not project:
# If project doesn't exist, we try to create it using available repo info
# We construct a minimal "pseudo-PR" dict if needed, or better:
# We assume if we are chatting on a PR, we can get PR details via API later
# For now, let's just Try to Find Project. If not found, we CANNOT proceed easily without syncing.
# But user wants "Auto Discovery".
# Let's try to create it.
try:
# Mock a PR object for creation purposes (minimal fields)
mock_pr = {
"number": issue.get("number"),
"head": {"ref": repo.get("default_branch", "main"), "sha": "HEAD"}, # Fallback
"base": {"ref": repo.get("default_branch", "main")}
}
project = await self._get_or_create_project(repo, mock_pr)
except Exception as e:
logger.error(f"Failed to auto-create project from chat: {e}")
return
if not project:
logger.warning("Project could not be determined for chat event")
return
# 2. Retrieve Context (RAG)
retriever = CodeRetriever(
collection_name=f"ci_{project.id}",
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
)
# Use the user comment as query
query = body.replace("@ai-bot", "").strip()
context_results = await retriever.retrieve(query, top_k=5)
repo_context = "\n".join([r.to_context_string() for r in context_results])
# 3. Build Prompt
# Fetch conversation history (simplified: just current comment)
history = f"User: {query}"
prompt = build_chat_prompt(query, repo_context, history)
# 4. Generate Answer
response = await self.llm_service.chat_completion_raw(
messages=[{"role": "user", "content": prompt}],
temperature=0.4
)
answer = response["content"]
# 5. Reply
# Append context info footer
footer = "\n\n---\n*Context used: " + ", ".join([f"`{r.file_path}`" for r in context_results]) + "*"
await self._post_gitea_comment(repo, issue.get("number"), answer + footer)
# 6. Record (Optional, maybe just log)
review_record = PRReview(
project_id=project.id,
pr_number=issue.get("number"),
event_type="comment",
summary=f"Q: {query[:50]}...",
full_report=answer,
context_used=json.dumps([r.file_path for r in context_results])
)
self.db.add(review_record)
await self.db.commit()
async def _get_or_create_project(self, repo: Dict, pr: Dict) -> Project:
repo_url = repo.get("clone_url")
# Check if exists
stmt = select(Project).where(Project.repository_url == repo_url)
result = await self.db.execute(stmt)
project = result.scalars().first()
if not project:
# Create new
# Find a valid user to assign as owner (required field)
from app.models.user import User
user_stmt = select(User).limit(1)
user_res = await self.db.execute(user_stmt)
default_user = user_res.scalars().first()
owner_id = default_user.id if default_user else "system_fallback_user"
project = Project(
name=repo.get("name"),
description=repo.get("description"),
source_type="repository",
repository_url=repo_url,
repository_type="gitea",
default_branch=repo.get("default_branch", "main"),
owner_id=owner_id,
is_ci_managed=True
)
try:
self.db.add(project)
await self.db.commit()
await self.db.refresh(project)
logger.info(f"🆕 Created CI Project: {project.name}")
except Exception as e:
logger.error(f"Failed to create project: {e}")
# Try rollback possibly?
await self.db.rollback()
raise e
return project
async def _get_project_by_repo(self, repo_url: str) -> Optional[Project]:
stmt = select(Project).where(Project.repository_url == repo_url)
result = await self.db.execute(stmt)
return result.scalars().first()
async def _prepare_repository(self, project: Project, repo_url: str, branch: str, token: str) -> str:
"""
Clones or Updates the repository locally.
"""
target_dir = CI_WORKSPACE_DIR / project.id
# Inject Token into URL for auth
# Format: http://token@host/repo.git
if "://" in repo_url:
protocol, rest = repo_url.split("://", 1)
auth_url = f"{protocol}://{token}@{rest}"
else:
auth_url = repo_url # Fallback
if target_dir.exists():
# Update
logger.info(f"🔄 Updating repo at {target_dir}")
try:
# git fetch --all
subprocess.run(["git", "fetch", "--all"], cwd=target_dir, check=True)
# git checkout branch
subprocess.run(["git", "checkout", branch], cwd=target_dir, check=True)
# git reset --hard origin/branch
subprocess.run(["git", "reset", "--hard", f"origin/{branch}"], cwd=target_dir, check=True)
except Exception as e:
logger.error(f"Git update failed: {e}. Re-cloning...")
shutil.rmtree(target_dir) # Nuke and retry
return await self._prepare_repository(project, repo_url, branch, token)
else:
# Clone
logger.info(f"📥 Cloning repo to {target_dir}")
try:
subprocess.run(["git", "clone", "-b", branch, auth_url, str(target_dir)], check=True)
except Exception as e:
logger.error(f"Git clone failed: {e}")
raise e
return str(target_dir)
async def _get_pr_diff(self, repo: Dict, pr_number: int) -> str:
"""
Fetch the PR diff from Gitea API
"""
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/pulls/{pr_number}.diff"
headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
try:
async with httpx.AsyncClient() as client:
resp = await client.get(api_url, headers=headers)
if resp.status_code == 200:
return resp.text
else:
logger.error(f"Failed to fetch diff: {resp.status_code} - {resp.text[:200]}")
return ""
except Exception as e:
logger.error(f"Failed to fetch PR diff: {e}")
return ""
async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str):
if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured")
return
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments"
headers = {
"Authorization": f"token {settings.GITEA_BOT_TOKEN}",
"Content-Type": "application/json"
}
try:
async with httpx.AsyncClient() as client:
resp = await client.post(api_url, headers=headers, json={"body": body})
if resp.status_code >= 400:
logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}")
except Exception as e:
logger.error(f"Failed to post Gitea comment: {e}")