""" CI Service Handles Gitea webhook events, manages RAG indexing for CI projects, and performs automated code reviews. """ import os import shutil import logging import subprocess import json from typing import Dict, Any, List, Optional from pathlib import Path from datetime import datetime import asyncio import httpx from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from app.core.config import settings from app.models.project import Project from app.models.ci import PRReview from app.core.ci_prompts import ( build_pr_review_prompt, build_chat_prompt, PR_SYNC_TASK ) from app.services.rag.indexer import CodeIndexer, IndexUpdateMode from app.services.rag.retriever import CodeRetriever from app.services.llm.service import LLMService logger = logging.getLogger(__name__) # Base directory for storing CI clones CI_WORKSPACE_DIR = Path("data/ci_workspace") CI_VECTOR_DB_DIR = Path("data/ci_vectordb") class CIService: def __init__(self, db: AsyncSession): self.db = db # Ensure workspaces exist CI_WORKSPACE_DIR.mkdir(parents=True, exist_ok=True) CI_VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True) self.llm_service = LLMService() # Use default config async def handle_pr_event(self, payload: Dict[str, Any]): """ Handle Pull Request events (opened, synchronized) """ action = payload.get("action") pr = payload.get("pull_request") repo = payload.get("repository") if not pr or not repo: return repo_url = repo.get("clone_url") pr_number = pr.get("number") branch = pr.get("head", {}).get("ref") commit_sha = pr.get("head", {}).get("sha") base_branch = pr.get("base", {}).get("ref") logger.info(f"🚀 Handling PR Event: {repo.get('full_name')} #{pr_number} ({action})") # 1. Get or Create Project try: project = await self._get_or_create_project(repo, pr) except Exception as e: logger.error(f"Error creating project: {e}") return # 2. Clone/Update Repo & Indexing (RAG) try: repo_path = await self._prepare_repository(project, repo_url, branch, settings.GITEA_BOT_TOKEN) except Exception as e: logger.error(f"Git operation failed: {e}") # If clone fails, we can't proceed with RAG, but we shouldn't crash return try: # 3. Incremental Indexing indexer = CodeIndexer( collection_name=f"ci_{project.id}", persist_directory=str(CI_VECTOR_DB_DIR / project.id) ) # Iterate over the generator to execute indexing async for progress in indexer.smart_index_directory( directory=repo_path, update_mode=IndexUpdateMode.INCREMENTAL ): if progress.processed_files % 10 == 0: logger.info(f"Indexing progress: {progress.processed_files}/{progress.total_files}") # 4. Analyze Diff & Retrieve Context diff_text = await self._get_pr_diff(repo, pr_number) if not diff_text: logger.warning("Empty diff or failed to fetch diff. Skipping review.") return # Retrieve context relevant to the diff retriever = CodeRetriever( collection_name=f"ci_{project.id}", persist_directory=str(CI_VECTOR_DB_DIR / project.id) ) context_results = await retriever.retrieve(diff_text[:1000], top_k=5) repo_context = "\n".join([r.to_context_string() for r in context_results]) # 5. Generate Review history = "" if action == "synchronize": prompt = build_pr_review_prompt(diff_text, repo_context, history) prompt += f"\n\nNOTE: {PR_SYNC_TASK}" else: prompt = build_pr_review_prompt(diff_text, repo_context, history) # Call LLM response = await self.llm_service.chat_completion_raw( messages=[{"role": "user", "content": prompt}], temperature=0.2 ) review_body = response["content"] # 6. Post Comment await self._post_gitea_comment(repo, pr_number, review_body) # 7. Save Record review_record = PRReview( project_id=project.id, pr_number=pr_number, commit_sha=commit_sha, event_type=action, summary=review_body[:200] + "...", full_report=review_body, context_used=json.dumps([r.file_path for r in context_results]) ) self.db.add(review_record) # Update project activity project.latest_pr_activity = datetime.utcnow() await self.db.commit() except Exception as e: logger.error(f"Error processing PR event: {e}") import traceback logger.error(traceback.format_exc()) # Don't raise, just log, so webhook returns 200 return async def handle_comment_event(self, payload: Dict[str, Any]): """ Handle Issue Comment events (chat) """ action = payload.get("action") issue = payload.get("issue") comment = payload.get("comment") repo = payload.get("repository") if action != "created" or not issue or not comment: return # Check if it's a PR if "pull_request" not in issue: return body = comment.get("body", "") if "@ai-bot" not in body: return logger.info(f"💬 Handling Chat Event: {repo.get('full_name')} #{issue.get('number')}") # 1. Get Project (or Create if discovered via Chat first) # We need a dummy PR object if we are creating project from chat, or we just fetch by repo # Since _get_or_create_project needs PR info to determine branch/owner, we might need a distinct method # or simplified flow. project = await self._get_project_by_repo(repo.get("clone_url")) if not project: # If project doesn't exist, we try to create it using available repo info # We construct a minimal "pseudo-PR" dict if needed, or better: # We assume if we are chatting on a PR, we can get PR details via API later # For now, let's just Try to Find Project. If not found, we CANNOT proceed easily without syncing. # But user wants "Auto Discovery". # Let's try to create it. try: # Mock a PR object for creation purposes (minimal fields) mock_pr = { "number": issue.get("number"), "head": {"ref": repo.get("default_branch", "main"), "sha": "HEAD"}, # Fallback "base": {"ref": repo.get("default_branch", "main")} } project = await self._get_or_create_project(repo, mock_pr) except Exception as e: logger.error(f"Failed to auto-create project from chat: {e}") return if not project: logger.warning("Project could not be determined for chat event") return # 2. Retrieve Context (RAG) retriever = CodeRetriever( collection_name=f"ci_{project.id}", persist_directory=str(CI_VECTOR_DB_DIR / project.id) ) # Use the user comment as query query = body.replace("@ai-bot", "").strip() context_results = await retriever.retrieve(query, top_k=5) repo_context = "\n".join([r.to_context_string() for r in context_results]) # 3. Build Prompt # Fetch conversation history (simplified: just current comment) history = f"User: {query}" prompt = build_chat_prompt(query, repo_context, history) # 4. Generate Answer response = await self.llm_service.chat_completion_raw( messages=[{"role": "user", "content": prompt}], temperature=0.4 ) answer = response["content"] # 5. Reply # Append context info footer footer = "\n\n---\n*Context used: " + ", ".join([f"`{r.file_path}`" for r in context_results]) + "*" await self._post_gitea_comment(repo, issue.get("number"), answer + footer) # 6. Record (Optional, maybe just log) review_record = PRReview( project_id=project.id, pr_number=issue.get("number"), event_type="comment", summary=f"Q: {query[:50]}...", full_report=answer, context_used=json.dumps([r.file_path for r in context_results]) ) self.db.add(review_record) await self.db.commit() async def _get_or_create_project(self, repo: Dict, pr: Dict) -> Project: repo_url = repo.get("clone_url") # Check if exists stmt = select(Project).where(Project.repository_url == repo_url) result = await self.db.execute(stmt) project = result.scalars().first() if not project: # Create new # Find a valid user to assign as owner (required field) from app.models.user import User user_stmt = select(User).limit(1) user_res = await self.db.execute(user_stmt) default_user = user_res.scalars().first() owner_id = default_user.id if default_user else "system_fallback_user" project = Project( name=repo.get("name"), description=repo.get("description"), source_type="repository", repository_url=repo_url, repository_type="gitea", default_branch=repo.get("default_branch", "main"), owner_id=owner_id, is_ci_managed=True ) try: self.db.add(project) await self.db.commit() await self.db.refresh(project) logger.info(f"🆕 Created CI Project: {project.name}") except Exception as e: logger.error(f"Failed to create project: {e}") # Try rollback possibly? await self.db.rollback() raise e return project async def _get_project_by_repo(self, repo_url: str) -> Optional[Project]: stmt = select(Project).where(Project.repository_url == repo_url) result = await self.db.execute(stmt) return result.scalars().first() async def _prepare_repository(self, project: Project, repo_url: str, branch: str, token: str) -> str: """ Clones or Updates the repository locally. """ target_dir = CI_WORKSPACE_DIR / project.id # Inject Token into URL for auth # Format: http://token@host/repo.git if "://" in repo_url: protocol, rest = repo_url.split("://", 1) auth_url = f"{protocol}://{token}@{rest}" else: auth_url = repo_url # Fallback if target_dir.exists(): # Update logger.info(f"🔄 Updating repo at {target_dir}") try: # git fetch --all subprocess.run(["git", "fetch", "--all"], cwd=target_dir, check=True) # git checkout branch subprocess.run(["git", "checkout", branch], cwd=target_dir, check=True) # git reset --hard origin/branch subprocess.run(["git", "reset", "--hard", f"origin/{branch}"], cwd=target_dir, check=True) except Exception as e: logger.error(f"Git update failed: {e}. Re-cloning...") shutil.rmtree(target_dir) # Nuke and retry return await self._prepare_repository(project, repo_url, branch, token) else: # Clone logger.info(f"📥 Cloning repo to {target_dir}") try: subprocess.run(["git", "clone", "-b", branch, auth_url, str(target_dir)], check=True) except Exception as e: logger.error(f"Git clone failed: {e}") raise e return str(target_dir) async def _get_pr_diff(self, repo: Dict, pr_number: int) -> str: """ Fetch the PR diff from Gitea API """ api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/pulls/{pr_number}.diff" headers = {"Authorization": f"token {settings.GITEA_BOT_TOKEN}"} try: async with httpx.AsyncClient() as client: resp = await client.get(api_url, headers=headers) if resp.status_code == 200: return resp.text else: logger.error(f"Failed to fetch diff: {resp.status_code} - {resp.text[:200]}") return "" except Exception as e: logger.error(f"Failed to fetch PR diff: {e}") return "" async def _post_gitea_comment(self, repo: Dict, issue_number: int, body: str): if not settings.GITEA_HOST_URL or not settings.GITEA_BOT_TOKEN: logger.error("GITEA_HOST_URL or GITEA_BOT_TOKEN not configured") return api_url = f"{settings.GITEA_HOST_URL}/api/v1/repos/{repo['owner']['login']}/{repo['name']}/issues/{issue_number}/comments" headers = { "Authorization": f"token {settings.GITEA_BOT_TOKEN}", "Content-Type": "application/json" } try: async with httpx.AsyncClient() as client: resp = await client.post(api_url, headers=headers, json={"body": body}) if resp.status_code >= 400: logger.error(f"Gitea API Error: {resp.status_code} - {resp.text}") except Exception as e: logger.error(f"Failed to post Gitea comment: {e}")