CodeReview/backend/app/services/agent/core/fallback.py

"""
Fallback and Graceful Degradation Module

Provides fallback strategies when components fail.
"""

import asyncio
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar

from .errors import (
    AgentError,
    LLMError,
    LLMRateLimitError,
    LLMTimeoutError,
    LLMContextLengthError,
    ToolError,
    ExternalToolError,
    RecoveryStrategy,
)
from .logging import get_logger

logger = get_logger("fallback")

T = TypeVar("T")


class FallbackAction(str, Enum):
    """Actions to take when fallback is triggered"""
    RETRY = "retry"
    RETRY_WITH_REDUCED_CONTEXT = "retry_reduced"
    USE_FALLBACK_TOOL = "use_fallback"
    SKIP = "skip"
    CONTINUE_PARTIAL = "continue_partial"
    ABORT = "abort"


@dataclass
class FallbackResult:
    """Result of a fallback operation"""
    action: FallbackAction
    success: bool
    result: Optional[Any] = None
    error: Optional[Exception] = None
    fallback_used: Optional[str] = None
    message: str = ""


@dataclass
class FallbackConfig:
    """Configuration for fallback behavior"""
    enabled: bool = True
    max_context_reduction_ratio: float = 0.5  # Reduce context to 50%
    continue_on_partial: bool = True
    tool_fallbacks: Dict[str, str] = field(default_factory=lambda: {
        "semgrep_scan": "pattern_match",
        "bandit_scan": "pattern_match",
        "gitleaks_scan": "search_code",
        "npm_audit": "search_code",
    })


class FallbackHandler:
    """
    Handles fallback logic for various failure scenarios.

    Usage:
        handler = FallbackHandler(config)
        result = await handler.handle_llm_failure(error, context)
    """

    def __init__(self, config: Optional[FallbackConfig] = None):
        self.config = config or FallbackConfig()

    async def handle_llm_failure(
        self,
        error: Exception,
        context: Dict[str, Any],
        retry_func: Optional[Callable[[], Awaitable[Any]]] = None,
    ) -> FallbackResult:
        """
        Handle LLM call failure.

        Args:
            error: The exception that occurred
            context: Context including messages, iteration, etc.
            retry_func: Optional function to retry with modified params

        Returns:
            FallbackResult with action and result
        """
        if not self.config.enabled:
            return FallbackResult(
                action=FallbackAction.ABORT,
                success=False,
                error=error,
                message="Fallback disabled",
            )

        # Handle specific error types
        if isinstance(error, LLMRateLimitError):
            return FallbackResult(
                action=FallbackAction.RETRY,
                success=False,
                error=error,
                message=f"Rate limited, retry after {error.retry_after}s",
            )

        if isinstance(error, LLMTimeoutError):
            # Try with reduced context
            if retry_func and context.get("can_reduce_context", True):
                return FallbackResult(
                    action=FallbackAction.RETRY_WITH_REDUCED_CONTEXT,
                    success=False,
                    error=error,
                    message="Timeout, will retry with reduced context",
                )
            return FallbackResult(
                action=FallbackAction.CONTINUE_PARTIAL,
                success=False,
                error=error,
                message="Timeout, continuing with partial results",
            )

        if isinstance(error, LLMContextLengthError):
            return FallbackResult(
                action=FallbackAction.RETRY_WITH_REDUCED_CONTEXT,
                success=False,
                error=error,
                message="Context too long, reducing and retrying",
            )

        # Generic LLM error
        if isinstance(error, LLMError):
            if error.recoverable:
                return FallbackResult(
                    action=FallbackAction.RETRY,
                    success=False,
                    error=error,
                    message=f"LLM error (recoverable): {error.message}",
                )
            return FallbackResult(
                action=FallbackAction.ABORT,
                success=False,
                error=error,
                message=f"LLM error (non-recoverable): {error.message}",
            )

        # Unknown error
        return FallbackResult(
            action=FallbackAction.ABORT,
            success=False,
            error=error,
            message=f"Unknown LLM error: {error}",
        )

    async def handle_tool_failure(
        self,
        tool_name: str,
        error: Exception,
        tool_input: Dict[str, Any],
        fallback_executor: Optional[Callable[[str, Dict], Awaitable[Any]]] = None,
    ) -> FallbackResult:
        """
        Handle tool execution failure.

        Args:
            tool_name: Name of the failed tool
            error: The exception that occurred
            tool_input: Original tool input
            fallback_executor: Function to execute fallback tool

        Returns:
            FallbackResult with action and result
        """
        if not self.config.enabled:
            return FallbackResult(
                action=FallbackAction.ABORT,
                success=False,
                error=error,
                message="Fallback disabled",
            )

        # Check if there's a fallback tool
        fallback_tool = self.config.tool_fallbacks.get(tool_name)

        if fallback_tool and fallback_executor:
            try:
                logger.info(
                    f"Using fallback tool {fallback_tool} for failed {tool_name}",
                    tool_name=tool_name,
                    fallback_tool=fallback_tool,
                )

                result = await fallback_executor(fallback_tool, tool_input)

                return FallbackResult(
                    action=FallbackAction.USE_FALLBACK_TOOL,
                    success=True,
                    result=result,
                    fallback_used=fallback_tool,
                    message=f"Used fallback tool: {fallback_tool}",
                )
            except Exception as fallback_error:
                logger.warning(
                    f"Fallback tool {fallback_tool} also failed: {fallback_error}",
                    tool_name=tool_name,
                    fallback_tool=fallback_tool,
                )
                return FallbackResult(
                    action=FallbackAction.SKIP,
                    success=False,
                    error=fallback_error,
                    fallback_used=fallback_tool,
                    message=f"Fallback tool also failed: {fallback_error}",
                )

        # No fallback available
        if isinstance(error, (ToolError, ExternalToolError)):
            if error.recoverable:
                return FallbackResult(
                    action=FallbackAction.RETRY,
                    success=False,
                    error=error,
                    message=f"Tool error (recoverable): {error}",
                )

        return FallbackResult(
            action=FallbackAction.SKIP,
            success=False,
            error=error,
            message=f"Tool failed, skipping: {error}",
        )

    def reduce_context(
        self,
        messages: List[Dict[str, Any]],
        reduction_ratio: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        """
        Reduce context size by removing older messages.

        Args:
            messages: List of conversation messages
            reduction_ratio: How much to keep (0.5 = keep 50%)

        Returns:
            Reduced list of messages
        """
        ratio = reduction_ratio or self.config.max_context_reduction_ratio

        if len(messages) <= 2:
            return messages

        # Always keep system message and last user message
        system_messages = [m for m in messages if m.get("role") == "system"]
        other_messages = [m for m in messages if m.get("role") != "system"]

        # Keep ratio of non-system messages (at least the last one)
        keep_count = max(1, int(len(other_messages) * ratio))
        kept_messages = other_messages[-keep_count:]

        return system_messages + kept_messages

    def truncate_content(
        self,
        content: str,
        max_length: int = 50000,
        keep_start: int = 20000,
        keep_end: int = 20000,
    ) -> str:
        """
        Truncate content while keeping start and end.

        Args:
            content: Content to truncate
            max_length: Maximum length
            keep_start: Characters to keep from start
            keep_end: Characters to keep from end

        Returns:
            Truncated content
        """
        if len(content) <= max_length:
            return content

        truncation_notice = "\n\n... [CONTENT TRUNCATED] ...\n\n"
        available = max_length - len(truncation_notice)

        start_len = min(keep_start, available // 2)
        end_len = min(keep_end, available - start_len)

        return content[:start_len] + truncation_notice + content[-end_len:]


# ============ Fallback Decorator ============

def with_fallback(
    fallback_func: Optional[Callable[..., Awaitable[T]]] = None,
    on_error: Optional[Callable[[Exception], Awaitable[None]]] = None,
    default_value: Optional[T] = None,
):
    """
    Decorator to add fallback behavior to async functions.

    Usage:
        @with_fallback(fallback_func=backup_llm_call, default_value="")
        async def call_llm(prompt: str):
            ...
    """
    def decorator(func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
        async def wrapper(*args, **kwargs) -> T:
            try:
                return await func(*args, **kwargs)
            except Exception as e:
                if on_error:
                    await on_error(e)

                if fallback_func:
                    try:
                        return await fallback_func(*args, **kwargs)
                    except Exception:
                        pass

                if default_value is not None:
                    return default_value

                raise

        return wrapper
    return decorator


# ============ Global Handler ============

_global_handler: Optional[FallbackHandler] = None


def get_fallback_handler() -> FallbackHandler:
    """Get global fallback handler"""
    global _global_handler
    if _global_handler is None:
        _global_handler = FallbackHandler()
    return _global_handler


def configure_fallback(config: FallbackConfig) -> None:
    """Configure global fallback handler"""
    global _global_handler
    _global_handler = FallbackHandler(config)
feat: 新增安全工具集成和漏洞知识库扩展 - 添加 Bandit 和 Safety 安全工具到依赖项 - 新增 CSRF、业务逻辑和开放重定向漏洞知识文档 - 实现安全工具一键安装脚本和文档 - 改进模式匹配工具支持直接文件扫描 - 增强遥测模块和 Agent 审计功能 - 修复验证节点中 findings 合并逻辑 - 优化前端 Agent 审计界面和状态展示 2025-12-13 12:35:03 +08:00			`"""`
			`Fallback and Graceful Degradation Module`

			`Provides fallback strategies when components fail.`
			`"""`

			`import asyncio`
			`from dataclasses import dataclass, field`
			`from enum import Enum`
			`from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar`

			`from .errors import (`
			`AgentError,`
			`LLMError,`
			`LLMRateLimitError,`
			`LLMTimeoutError,`
			`LLMContextLengthError,`
			`ToolError,`
			`ExternalToolError,`
			`RecoveryStrategy,`
			`)`
			`from .logging import get_logger`

			`logger = get_logger("fallback")`

			`T = TypeVar("T")`


			`class FallbackAction(str, Enum):`
			`"""Actions to take when fallback is triggered"""`
			`RETRY = "retry"`
			`RETRY_WITH_REDUCED_CONTEXT = "retry_reduced"`
			`USE_FALLBACK_TOOL = "use_fallback"`
			`SKIP = "skip"`
			`CONTINUE_PARTIAL = "continue_partial"`
			`ABORT = "abort"`


			`@dataclass`
			`class FallbackResult:`
			`"""Result of a fallback operation"""`
			`action: FallbackAction`
			`success: bool`
			`result: Optional[Any] = None`
			`error: Optional[Exception] = None`
			`fallback_used: Optional[str] = None`
			`message: str = ""`


			`@dataclass`
			`class FallbackConfig:`
			`"""Configuration for fallback behavior"""`
			`enabled: bool = True`
			`max_context_reduction_ratio: float = 0.5 # Reduce context to 50%`
			`continue_on_partial: bool = True`
			`tool_fallbacks: Dict[str, str] = field(default_factory=lambda: {`
			`"semgrep_scan": "pattern_match",`
			`"bandit_scan": "pattern_match",`
			`"gitleaks_scan": "search_code",`
			`"npm_audit": "search_code",`
			`})`


			`class FallbackHandler:`
			`"""`
			`Handles fallback logic for various failure scenarios.`

			`Usage:`
			`handler = FallbackHandler(config)`
			`result = await handler.handle_llm_failure(error, context)`
			`"""`

			`def __init__(self, config: Optional[FallbackConfig] = None):`
			`self.config = config or FallbackConfig()`

			`async def handle_llm_failure(`
			`self,`
			`error: Exception,`
			`context: Dict[str, Any],`
			`retry_func: Optional[Callable[[], Awaitable[Any]]] = None,`
			`) -> FallbackResult:`
			`"""`
			`Handle LLM call failure.`

			`Args:`
			`error: The exception that occurred`
			`context: Context including messages, iteration, etc.`
			`retry_func: Optional function to retry with modified params`

			`Returns:`
			`FallbackResult with action and result`
			`"""`
			`if not self.config.enabled:`
			`return FallbackResult(`
			`action=FallbackAction.ABORT,`
			`success=False,`
			`error=error,`
			`message="Fallback disabled",`
			`)`

			`# Handle specific error types`
			`if isinstance(error, LLMRateLimitError):`
			`return FallbackResult(`
			`action=FallbackAction.RETRY,`
			`success=False,`
			`error=error,`
			`message=f"Rate limited, retry after {error.retry_after}s",`
			`)`

			`if isinstance(error, LLMTimeoutError):`
			`# Try with reduced context`
			`if retry_func and context.get("can_reduce_context", True):`
			`return FallbackResult(`
			`action=FallbackAction.RETRY_WITH_REDUCED_CONTEXT,`
			`success=False,`
			`error=error,`
			`message="Timeout, will retry with reduced context",`
			`)`
			`return FallbackResult(`
			`action=FallbackAction.CONTINUE_PARTIAL,`
			`success=False,`
			`error=error,`
			`message="Timeout, continuing with partial results",`
			`)`

			`if isinstance(error, LLMContextLengthError):`
			`return FallbackResult(`
			`action=FallbackAction.RETRY_WITH_REDUCED_CONTEXT,`
			`success=False,`
			`error=error,`
			`message="Context too long, reducing and retrying",`
			`)`

			`# Generic LLM error`
			`if isinstance(error, LLMError):`
			`if error.recoverable:`
			`return FallbackResult(`
			`action=FallbackAction.RETRY,`
			`success=False,`
			`error=error,`
			`message=f"LLM error (recoverable): {error.message}",`
			`)`
			`return FallbackResult(`
			`action=FallbackAction.ABORT,`
			`success=False,`
			`error=error,`
			`message=f"LLM error (non-recoverable): {error.message}",`
			`)`

			`# Unknown error`
			`return FallbackResult(`
			`action=FallbackAction.ABORT,`
			`success=False,`
			`error=error,`
			`message=f"Unknown LLM error: {error}",`
			`)`

			`async def handle_tool_failure(`
			`self,`
			`tool_name: str,`
			`error: Exception,`
			`tool_input: Dict[str, Any],`
			`fallback_executor: Optional[Callable[[str, Dict], Awaitable[Any]]] = None,`
			`) -> FallbackResult:`
			`"""`
			`Handle tool execution failure.`

			`Args:`
			`tool_name: Name of the failed tool`
			`error: The exception that occurred`
			`tool_input: Original tool input`
			`fallback_executor: Function to execute fallback tool`

			`Returns:`
			`FallbackResult with action and result`
			`"""`
			`if not self.config.enabled:`
			`return FallbackResult(`
			`action=FallbackAction.ABORT,`
			`success=False,`
			`error=error,`
			`message="Fallback disabled",`
			`)`

			`# Check if there's a fallback tool`
			`fallback_tool = self.config.tool_fallbacks.get(tool_name)`

			`if fallback_tool and fallback_executor:`
			`try:`
			`logger.info(`
			`f"Using fallback tool {fallback_tool} for failed {tool_name}",`
			`tool_name=tool_name,`
			`fallback_tool=fallback_tool,`
			`)`

			`result = await fallback_executor(fallback_tool, tool_input)`

			`return FallbackResult(`
			`action=FallbackAction.USE_FALLBACK_TOOL,`
			`success=True,`
			`result=result,`
			`fallback_used=fallback_tool,`
			`message=f"Used fallback tool: {fallback_tool}",`
			`)`
			`except Exception as fallback_error:`
			`logger.warning(`
			`f"Fallback tool {fallback_tool} also failed: {fallback_error}",`
			`tool_name=tool_name,`
			`fallback_tool=fallback_tool,`
			`)`
			`return FallbackResult(`
			`action=FallbackAction.SKIP,`
			`success=False,`
			`error=fallback_error,`
			`fallback_used=fallback_tool,`
			`message=f"Fallback tool also failed: {fallback_error}",`
			`)`

			`# No fallback available`
			`if isinstance(error, (ToolError, ExternalToolError)):`
			`if error.recoverable:`
			`return FallbackResult(`
			`action=FallbackAction.RETRY,`
			`success=False,`
			`error=error,`
			`message=f"Tool error (recoverable): {error}",`
			`)`

			`return FallbackResult(`
			`action=FallbackAction.SKIP,`
			`success=False,`
			`error=error,`
			`message=f"Tool failed, skipping: {error}",`
			`)`

			`def reduce_context(`
			`self,`
			`messages: List[Dict[str, Any]],`
			`reduction_ratio: Optional[float] = None,`
			`) -> List[Dict[str, Any]]:`
			`"""`
			`Reduce context size by removing older messages.`

			`Args:`
			`messages: List of conversation messages`
			`reduction_ratio: How much to keep (0.5 = keep 50%)`

			`Returns:`
			`Reduced list of messages`
			`"""`
			`ratio = reduction_ratio or self.config.max_context_reduction_ratio`

			`if len(messages) <= 2:`
			`return messages`

			`# Always keep system message and last user message`
			`system_messages = [m for m in messages if m.get("role") == "system"]`
			`other_messages = [m for m in messages if m.get("role") != "system"]`

			`# Keep ratio of non-system messages (at least the last one)`
			`keep_count = max(1, int(len(other_messages) * ratio))`
			`kept_messages = other_messages[-keep_count:]`

			`return system_messages + kept_messages`

			`def truncate_content(`
			`self,`
			`content: str,`
			`max_length: int = 50000,`
			`keep_start: int = 20000,`
			`keep_end: int = 20000,`
			`) -> str:`
			`"""`
			`Truncate content while keeping start and end.`

			`Args:`
			`content: Content to truncate`
			`max_length: Maximum length`
			`keep_start: Characters to keep from start`
			`keep_end: Characters to keep from end`

			`Returns:`
			`Truncated content`
			`"""`
			`if len(content) <= max_length:`
			`return content`

			`truncation_notice = "\n\n... [CONTENT TRUNCATED] ...\n\n"`
			`available = max_length - len(truncation_notice)`

			`start_len = min(keep_start, available // 2)`
			`end_len = min(keep_end, available - start_len)`

			`return content[:start_len] + truncation_notice + content[-end_len:]`


			`# ============ Fallback Decorator ============`

			`def with_fallback(`
			`fallback_func: Optional[Callable[..., Awaitable[T]]] = None,`
			`on_error: Optional[Callable[[Exception], Awaitable[None]]] = None,`
			`default_value: Optional[T] = None,`
			`):`
			`"""`
			`Decorator to add fallback behavior to async functions.`

			`Usage:`
			`@with_fallback(fallback_func=backup_llm_call, default_value="")`
			`async def call_llm(prompt: str):`
			`...`
			`"""`
			`def decorator(func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:`
			`async def wrapper(args, *kwargs) -> T:`
			`try:`
			`return await func(args, *kwargs)`
			`except Exception as e:`
			`if on_error:`
			`await on_error(e)`

			`if fallback_func:`
			`try:`
			`return await fallback_func(args, *kwargs)`
			`except Exception:`
			`pass`

			`if default_value is not None:`
			`return default_value`

			`raise`

			`return wrapper`
			`return decorator`


			`# ============ Global Handler ============`

			`_global_handler: Optional[FallbackHandler] = None`


			`def get_fallback_handler() -> FallbackHandler:`
			`"""Get global fallback handler"""`
			`global _global_handler`
			`if _global_handler is None:`
			`_global_handler = FallbackHandler()`
			`return _global_handler`


			`def configure_fallback(config: FallbackConfig) -> None:`
			`"""Configure global fallback handler"""`
			`global _global_handler`
			`_global_handler = FallbackHandler(config)`