371 lines
14 KiB
Python
371 lines
14 KiB
Python
|
|
"""
|
|
CI Service
|
|
Handles Gitea webhook events, manages RAG indexing for CI projects, and performs automated code reviews.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import logging
|
|
import subprocess
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import asyncio
|
|
import httpx
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select
|
|
|
|
from app.core.config import settings
|
|
from app.models.project import Project
|
|
from app.models.ci import PRReview
|
|
from app.core.ci_prompts import (
|
|
build_pr_review_prompt,
|
|
build_chat_prompt,
|
|
PR_SYNC_TASK
|
|
)
|
|
from app.services.rag.indexer import CodeIndexer, IndexUpdateMode
|
|
from app.services.rag.retriever import CodeRetriever
|
|
from app.services.llm.service import LLMService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Base directory for storing CI clones
|
|
CI_WORKSPACE_DIR = Path("data/ci_workspace")
|
|
CI_VECTOR_DB_DIR = Path("data/ci_vectordb")
|
|
|
|
class CIService:
|
|
|
|
def __init__(self, db: AsyncSession):
|
|
self.db = db
|
|
# Ensure workspaces exist
|
|
CI_WORKSPACE_DIR.mkdir(parents=True, exist_ok=True)
|
|
CI_VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
self.llm_service = LLMService() # Use default config
|
|
|
|
async def handle_pr_event(self, payload: Dict[str, Any]):
|
|
"""
|
|
Handle Pull Request events (opened, synchronized)
|
|
"""
|
|
action = payload.get("action")
|
|
pr = payload.get("pull_request")
|
|
repo = payload.get("repository")
|
|
|
|
if not pr or not repo:
|
|
return
|
|
|
|
repo_url = repo.get("clone_url")
|
|
pr_number = pr.get("number")
|
|
branch = pr.get("head", {}).get("ref")
|
|
commit_sha = pr.get("head", {}).get("sha")
|
|
base_branch = pr.get("base", {}).get("ref")
|
|
|
|
logger.info(f"🚀 Handling PR Event: {repo.get('full_name')} #{pr_number} ({action})")
|
|
|
|
# 1. Get or Create Project
|
|
try:
|
|
project = await self._get_or_create_project(repo, pr)
|
|
except Exception as e:
|
|
logger.error(f"Error creating project: {e}")
|
|
return
|
|
|
|
# 2. Clone/Update Repo & Indexing (RAG)
|
|
try:
|
|
repo_path = await self._prepare_repository(project, repo_url, branch, settings.GITEA_BOT_TOKEN)
|
|
except Exception as e:
|
|
logger.error(f"Git operation failed: {e}")
|
|
# If clone fails, we can't proceed with RAG, but we shouldn't crash
|
|
return
|
|
|
|
try:
|
|
# 3. Incremental Indexing
|
|
indexer = CodeIndexer(
|
|
collection_name=f"ci_{project.id}",
|
|
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
|
|
)
|
|
# Iterate over the generator to execute indexing
|
|
async for progress in indexer.smart_index_directory(
|
|
directory=repo_path,
|
|
update_mode=IndexUpdateMode.INCREMENTAL
|
|
):
|
|
if progress.processed_files % 10 == 0:
|
|
logger.info(f"Indexing progress: {progress.processed_files}/{progress.total_files}")
|
|
|
|
# 4. Analyze Diff & Retrieve Context
|
|
diff_text = await self._get_pr_diff(repo, pr_number)
|
|
if not diff_text:
|
|
logger.warning("Empty diff or failed to fetch diff. Skipping review.")
|
|
return
|
|
|
|
# Retrieve context relevant to the diff
|
|
retriever = CodeRetriever(
|
|
collection_name=f"ci_{project.id}",
|
|
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
|
|
)
|
|
|
|
context_results = await retriever.retrieve(diff_text[:1000], top_k=5)
|
|
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
|
|
|
# 5. Generate Review
|
|
history = ""
|
|
|
|
if action == "synchronize":
|
|
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
|
prompt += f"\n\nNOTE: {PR_SYNC_TASK}"
|
|
else:
|
|
prompt = build_pr_review_prompt(diff_text, repo_context, history)
|
|
|
|
# Call LLM
|
|
response = await self.llm_service.chat_completion_raw(
|
|
messages=[{"role": "user", "content": prompt}],
|
|
temperature=0.2
|
|
)
|
|
|
|
review_body = response["content"]
|
|
|
|
# 6. Post Comment
|
|
await self._post_gitea_comment(repo, pr_number, review_body)
|
|
|
|
# 7. Save Record
|
|
review_record = PRReview(
|
|
project_id=project.id,
|
|
pr_number=pr_number,
|
|
commit_sha=commit_sha,
|
|
event_type=action,
|
|
summary=review_body[:200] + "...",
|
|
full_report=review_body,
|
|
context_used=json.dumps([r.file_path for r in context_results])
|
|
)
|
|
self.db.add(review_record)
|
|
|
|
# Update project activity
|
|
project.latest_pr_activity = datetime.utcnow()
|
|
await self.db.commit()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing PR event: {e}")
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
# Don't raise, just log, so webhook returns 200
|
|
return
|
|
|
|
|
|
async def handle_comment_event(self, payload: Dict[str, Any]):
|
|
"""
|
|
Handle Issue Comment events (chat)
|
|
"""
|
|
action = payload.get("action")
|
|
issue = payload.get("issue")
|
|
comment = payload.get("comment")
|
|
repo = payload.get("repository")
|
|
|
|
if action != "created" or not issue or not comment:
|
|
return
|
|
|
|
# Check if it's a PR
|
|
if "pull_request" not in issue:
|
|
return
|
|
|
|
body = comment.get("body", "")
|
|
if "@ai-bot" not in body:
|
|
return
|
|
|
|
logger.info(f"💬 Handling Chat Event: {repo.get('full_name')} #{issue.get('number')}")
|
|
|
|
# 1. Get Project (or Create if discovered via Chat first)
|
|
# We need a dummy PR object if we are creating project from chat, or we just fetch by repo
|
|
# Since _get_or_create_project needs PR info to determine branch/owner, we might need a distinct method
|
|
# or simplified flow.
|
|
project = await self._get_project_by_repo(repo.get("clone_url"))
|
|
|
|
if not project:
|
|
# If project doesn't exist, we try to create it using available repo info
|
|
# We construct a minimal "pseudo-PR" dict if needed, or better:
|
|
# We assume if we are chatting on a PR, we can get PR details via API later
|
|
# For now, let's just Try to Find Project. If not found, we CANNOT proceed easily without syncing.
|
|
# But user wants "Auto Discovery".
|
|
# Let's try to create it.
|
|
try:
|
|
# Mock a PR object for creation purposes (minimal fields)
|
|
mock_pr = {
|
|
"number": issue.get("number"),
|
|
"head": {"ref": repo.get("default_branch", "main"), "sha": "HEAD"}, # Fallback
|
|
"base": {"ref": repo.get("default_branch", "main")}
|
|
}
|
|
project = await self._get_or_create_project(repo, mock_pr)
|
|
except Exception as e:
|
|
logger.error(f"Failed to auto-create project from chat: {e}")
|
|
return
|
|
|
|
if not project:
|
|
logger.warning("Project could not be determined for chat event")
|
|
return
|
|
|
|
# 2. Retrieve Context (RAG)
|
|
retriever = CodeRetriever(
|
|
collection_name=f"ci_{project.id}",
|
|
persist_directory=str(CI_VECTOR_DB_DIR / project.id)
|
|
)
|
|
# Use the user comment as query
|
|
query = body.replace("@ai-bot", "").strip()
|
|
context_results = await retriever.retrieve(query, top_k=5)
|
|
repo_context = "\n".join([r.to_context_string() for r in context_results])
|
|
|
|
# 3. Build Prompt
|
|
# Fetch conversation history (simplified: just current comment)
|
|
history = f"User: {query}"
|
|
prompt = build_chat_prompt(query, repo_context, history)
|
|
|
|
# 4. Generate Answer
|
|
response = await self.llm_service.chat_completion_raw(
|
|
messages=[{"role": "user", "content": prompt}],
|
|
temperature=0.4
|
|
)
|
|
|
|
answer = response["content"]
|
|
|
|
# 5. Reply
|
|
# Append context info footer
|
|
footer = "\n\n---\n*Context used: " + ", ".join([f"`{r.file_path}`" for r in context_results]) + "*"
|
|
await self._post_gitea_comment(repo, issue.get("number"), answer + footer)
|
|
|
|
# 6. Record (Optional, maybe just log)
|
|
review_record = PRReview(
|
|
project_id=project.id,
|
|
pr_number=issue.get("number"),
|
|
event_type="comment",
|
|
summary=f"Q: {query[:50]}...",
|
|
full_report=answer,
|
|
context_used=json.dumps([r.file_path for r in context_results])
|
|
)
|
|
self.db.add(review_record)
|
|
await self.db.commit()
|
|
|
|
|
|
async def _get_or_create_project(self, repo: Dict, pr: Dict) -> Project:
|
|
repo_url = repo.get("clone_url")
|
|
# Check if exists
|
|
stmt = select(Project).where(Project.repository_url == repo_url)
|
|
result = await self.db.execute(stmt)
|
|
project = result.scalars().first()
|
|
|
|
if not project:
|
|
# Create new
|
|
# Find a valid user to assign as owner (required field)
|
|
from app.models.user import User
|
|
user_stmt = select(User).limit(1)
|
|
user_res = await self.db.execute(user_stmt)
|
|
default_user = user_res.scalars().first()
|
|
|
|
owner_id = default_user.id if default_user else "system_fallback_user"
|
|
|
|
project = Project(
|
|
name=repo.get("name"),
|
|
description=repo.get("description"),
|
|
source_type="repository",
|
|
repository_url=repo_url,
|
|
repository_type="gitea",
|
|
default_branch=repo.get("default_branch", "main"),
|
|
owner_id=owner_id,
|
|
is_ci_managed=True
|
|
)
|
|
|
|
try:
|
|
self.db.add(project)
|
|
await self.db.commit()
|
|
await self.db.refresh(project)
|
|
logger.info(f"🆕 Created CI Project: {project.name}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to create project: {e}")
|
|
# Try rollback possibly?
|
|
await self.db.rollback()
|
|
raise e
|
|
|
|
return project
|
|
|
|
async def _get_project_by_repo(self, repo_url: str) -> Optional[Project]:
|
|
stmt = select(Project).where(Project.repository_url == repo_url)
|
|
result = await self.db.execute(stmt)
|
|
return result.scalars().first()
|
|
|
|
async def _prepare_repository(self, project: Project, repo_url: str, branch: str, token: str) -> str:
|
|
"""
|
|
Clones or Updates the repository locally.
|
|
"""
|
|
target_dir = CI_WORKSPACE_DIR / project.id
|
|
|
|
# Inject Token into URL for auth
|
|
# Format: http://token@host/repo.git
|
|
if "://" in repo_url:
|
|
protocol, rest = repo_url.split("://", 1)
|
|
auth_url = f"{protocol}://{token}@{rest}"
|
|
else:
|
|
auth_url = repo_url # Fallback
|
|
|
|
if target_dir.exists():
|
|
# Update
|
|
logger.info(f"🔄 Updating repo at {target_dir}")
|
|
try:
|
|
# git fetch --all
|
|
subprocess.run(["git", "fetch", "--all"], cwd=target_dir, check=True)
|
|
# git checkout branch
|
|
subprocess.run(["git", "checkout", branch], cwd=target_dir, check=True)
|
|
# git reset --hard origin/branch
|
|
subprocess.run(["git", "reset", "--hard", f"origin/{branch}"], cwd=target_dir, check=True)
|
|
except Exception as e:
|
|
logger.error(f"Git update failed: {e}. Re-cloning...")
|
|
shutil.rmtree(target_dir) # Nuke and retry
|
|
return await self._prepare_repository(project, repo_url, branch, token)
|
|
else:
|
|
# Clone
|
|
logger.info(f"📥 Cloning repo to {target_dir}")
|
|
try:
|
|
subprocess.run(["git", "clone", "-b", branch, auth_url, str(target_dir)], check=True)
|
|
except Exception as e:
|
|
logger.error(f"Git clone failed: {e}")
|
|
raise e
|
|
|
|
return str(target_dir)
|
|
|
|
async def _get_pr_diff(self, repo: Dict, pr_number: int) -> str:
|
|
"""
|
|
Fetch the PR diff from Gitea API
|
|
"""
|
|
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/pulls/{pr_number}.diff"
|
|
headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"}
|
|
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.get(api_url, headers=headers)
|
|
if resp.status_code == 200:
|
|
return resp.text
|
|
else:
|
|
logger.error(f"Failed to fetch diff: {resp.status_code} - {resp.text[:200]}")
|
|
return ""
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch PR diff: {e}")
|
|
return ""
|
|
|
|
async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str):
|
|
if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN:
|
|
logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured")
|
|
return
|
|
|
|
api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments"
|
|
|
|
headers = {
|
|
"Authorization": f"token {settings.GITEA_BOT_TOKEN}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(api_url, headers=headers, json={"body": body})
|
|
if resp.status_code >= 400:
|
|
logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to post Gitea comment: {e}")
|