CodeReview/backend/app/services/llm/prompt_cache.py

"""
Prompt Caching 模块

为支持缓存的 LLM（如 Anthropic Claude）提供 Prompt 缓存功能。
通过在系统提示词和早期对话中添加缓存标记，减少重复处理，
显著降低 Token 消耗和响应延迟。

支持的 LLM:
- Anthropic Claude (claude-3-5-sonnet, claude-3-opus, claude-3-haiku)
- OpenAI (部分模型支持)

缓存策略:
- 短对话（<10轮）: 仅缓存系统提示词
- 中等对话（10-30轮）: 缓存系统提示词 + 前5轮对话
- 长对话（>30轮）: 多个缓存点，动态调整
"""

import logging
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum

logger = logging.getLogger(__name__)


class CacheStrategy(str, Enum):
    """缓存策略"""
    NONE = "none"                    # 不缓存
    SYSTEM_ONLY = "system_only"      # 仅缓存系统提示词
    SYSTEM_AND_EARLY = "system_early"  # 缓存系统提示词和早期对话
    MULTI_POINT = "multi_point"      # 多缓存点


@dataclass
class CacheConfig:
    """缓存配置"""
    enabled: bool = True
    strategy: CacheStrategy = CacheStrategy.SYSTEM_AND_EARLY
    
    # 缓存阈值
    min_system_prompt_tokens: int = 1000  # 系统提示词最小 token 数才启用缓存
    early_messages_count: int = 5         # 早期对话缓存的消息数
    
    # 多缓存点配置
    multi_point_interval: int = 10        # 多缓存点间隔（消息数）
    max_cache_points: int = 4             # 最大缓存点数量


@dataclass
class CacheStats:
    """缓存统计"""
    cache_hits: int = 0
    cache_misses: int = 0
    cached_tokens: int = 0
    total_tokens: int = 0
    
    @property
    def hit_rate(self) -> float:
        total = self.cache_hits + self.cache_misses
        return self.cache_hits / total if total > 0 else 0.0
    
    @property
    def token_savings(self) -> float:
        return self.cached_tokens / self.total_tokens if self.total_tokens > 0 else 0.0


class PromptCacheManager:
    """
    Prompt 缓存管理器
    
    负责:
    1. 检测 LLM 是否支持缓存
    2. 根据对话长度选择缓存策略
    3. 为消息添加缓存标记
    4. 统计缓存效果
    """
    
    # 支持缓存的模型
    CACHEABLE_MODELS = {
        # Anthropic Claude
        "claude-3-5-sonnet": True,
        "claude-3-5-sonnet-20241022": True,
        "claude-3-opus": True,
        "claude-3-opus-20240229": True,
        "claude-3-haiku": True,
        "claude-3-haiku-20240307": True,
        "claude-3-sonnet": True,
        "claude-3-sonnet-20240229": True,
        # OpenAI (部分支持)
        "gpt-4-turbo": False,  # 暂不支持
        "gpt-4o": False,
        "gpt-4o-mini": False,
    }
    
    # Anthropic 缓存标记
    ANTHROPIC_CACHE_CONTROL = {"type": "ephemeral"}
    
    def __init__(self, config: Optional[CacheConfig] = None):
        self.config = config or CacheConfig()
        self.stats = CacheStats()
        self._cache_enabled_for_session = True
    
    def supports_caching(self, model: str, provider: str) -> bool:
        """
        检查模型是否支持缓存
        
        Args:
            model: 模型名称
            provider: 提供商名称
            
        Returns:
            是否支持缓存
        """
        if not self.config.enabled:
            return False
        
        # Anthropic Claude 支持缓存
        if provider.lower() in ["anthropic", "claude"]:
            # 检查模型名称
            for cacheable_model in self.CACHEABLE_MODELS:
                if cacheable_model in model.lower():
                    return self.CACHEABLE_MODELS.get(cacheable_model, False)
        
        return False
    
    def determine_strategy(
        self,
        messages: List[Dict[str, Any]],
        system_prompt_tokens: int = 0,
    ) -> CacheStrategy:
        """
        根据对话状态确定缓存策略
        
        Args:
            messages: 消息列表
            system_prompt_tokens: 系统提示词的 token 数
            
        Returns:
            缓存策略
        """
        if not self.config.enabled:
            return CacheStrategy.NONE
        
        # 系统提示词太短，不值得缓存
        if system_prompt_tokens < self.config.min_system_prompt_tokens:
            return CacheStrategy.NONE
        
        message_count = len(messages)
        
        # 短对话：仅缓存系统提示词
        if message_count < 10:
            return CacheStrategy.SYSTEM_ONLY
        
        # 中等对话：缓存系统提示词和早期对话
        if message_count < 30:
            return CacheStrategy.SYSTEM_AND_EARLY
        
        # 长对话：多缓存点
        return CacheStrategy.MULTI_POINT
    
    def add_cache_markers_anthropic(
        self,
        messages: List[Dict[str, Any]],
        strategy: CacheStrategy,
    ) -> List[Dict[str, Any]]:
        """
        为 Anthropic Claude 消息添加缓存标记
        
        Anthropic 的缓存格式:
        - 在 content 中使用 cache_control 字段
        - 支持 text 类型的 content block
        
        Args:
            messages: 原始消息列表
            strategy: 缓存策略
            
        Returns:
            添加了缓存标记的消息列表
        """
        if strategy == CacheStrategy.NONE:
            return messages
        
        cached_messages = []
        
        for i, msg in enumerate(messages):
            new_msg = msg.copy()
            
            # 系统提示词缓存
            if msg.get("role") == "system":
                new_msg = self._add_cache_to_message(new_msg)
                cached_messages.append(new_msg)
                continue
            
            # 早期对话缓存
            if strategy in [CacheStrategy.SYSTEM_AND_EARLY, CacheStrategy.MULTI_POINT]:
                if i <= self.config.early_messages_count:
                    new_msg = self._add_cache_to_message(new_msg)
            
            # 多缓存点
            if strategy == CacheStrategy.MULTI_POINT:
                if i > 0 and i % self.config.multi_point_interval == 0:
                    cache_point_count = i // self.config.multi_point_interval
                    if cache_point_count <= self.config.max_cache_points:
                        new_msg = self._add_cache_to_message(new_msg)
            
            cached_messages.append(new_msg)
        
        return cached_messages
    
    def _add_cache_to_message(self, msg: Dict[str, Any]) -> Dict[str, Any]:
        """
        为单条消息添加缓存标记
        
        Args:
            msg: 原始消息
            
        Returns:
            添加了缓存标记的消息
        """
        content = msg.get("content", "")
        
        # 如果 content 是字符串，转换为 content block 格式
        if isinstance(content, str):
            msg["content"] = [
                {
                    "type": "text",
                    "text": content,
                    "cache_control": self.ANTHROPIC_CACHE_CONTROL,
                }
            ]
        elif isinstance(content, list):
            # 已经是 content block 格式，为最后一个 block 添加缓存
            if content:
                last_block = content[-1]
                if isinstance(last_block, dict):
                    last_block["cache_control"] = self.ANTHROPIC_CACHE_CONTROL
        
        return msg
    
    def process_messages(
        self,
        messages: List[Dict[str, Any]],
        model: str,
        provider: str,
        system_prompt_tokens: int = 0,
    ) -> Tuple[List[Dict[str, Any]], bool]:
        """
        处理消息，添加缓存标记
        
        Args:
            messages: 原始消息列表
            model: 模型名称
            provider: 提供商名称
            system_prompt_tokens: 系统提示词 token 数
            
        Returns:
            (处理后的消息列表, 是否启用了缓存)
        """
        if not self.supports_caching(model, provider):
            return messages, False
        
        strategy = self.determine_strategy(messages, system_prompt_tokens)
        
        if strategy == CacheStrategy.NONE:
            return messages, False
        
        # 根据提供商选择缓存方法
        if provider.lower() in ["anthropic", "claude"]:
            cached_messages = self.add_cache_markers_anthropic(messages, strategy)
            logger.debug(f"Applied {strategy.value} caching strategy for Anthropic")
            return cached_messages, True
        
        return messages, False
    
    def update_stats(
        self,
        cache_creation_input_tokens: int = 0,
        cache_read_input_tokens: int = 0,
        total_input_tokens: int = 0,
    ):
        """
        更新缓存统计
        
        Args:
            cache_creation_input_tokens: 缓存创建的 token 数
            cache_read_input_tokens: 缓存读取的 token 数
            total_input_tokens: 总输入 token 数
        """
        if cache_read_input_tokens > 0:
            self.stats.cache_hits += 1
            self.stats.cached_tokens += cache_read_input_tokens
        else:
            self.stats.cache_misses += 1
        
        self.stats.total_tokens += total_input_tokens
    
    def get_stats_summary(self) -> Dict[str, Any]:
        """获取缓存统计摘要"""
        return {
            "cache_hits": self.stats.cache_hits,
            "cache_misses": self.stats.cache_misses,
            "hit_rate": f"{self.stats.hit_rate:.2%}",
            "cached_tokens": self.stats.cached_tokens,
            "total_tokens": self.stats.total_tokens,
            "token_savings": f"{self.stats.token_savings:.2%}",
        }


# 全局缓存管理器实例
prompt_cache_manager = PromptCacheManager()


def estimate_tokens(text: str) -> int:
    """
    估算文本的 token 数量
    
    简单估算：英文约 4 字符/token，中文约 2 字符/token
    
    Args:
        text: 文本内容
        
    Returns:
        估算的 token 数
    """
    if not text:
        return 0
    
    # 统计中文字符
    chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
    other_chars = len(text) - chinese_chars
    
    # 中文约 2 字符/token，其他约 4 字符/token
    return int(chinese_chars / 2 + other_chars / 4)
-												feat(agent): implement comprehensive agent architecture with knowledge base and persistence layer

- Add database migrations for agent checkpoints and tree node tracking
- Implement core agent execution framework with executor, state management, and message handling
- Create knowledge base system with framework-specific modules (Django, FastAPI, Flask, Express, React, Supabase)
- Add vulnerability knowledge modules covering authentication, cryptography, injection, XSS, XXE, SSRF, path traversal, deserialization, and race conditions
- Introduce new agent tools: thinking tool, reporting tool, and agent-specific utilities
- Implement LLM memory compression and prompt caching for improved performance
- Add agent registry and persistence layer for checkpoint management
- Refactor agent implementations (analysis, recon, verification, orchestrator) with enhanced capabilities
- Remove legacy agent implementations (analysis_v2, react_agent)
- Update API endpoints for agent task creation and project management
- Add frontend components for agent task creation and enhanced audit UI
- Consolidate agent service architecture with improved separation of concerns
- This refactoring provides a scalable foundation for multi-agent collaboration with knowledge-driven decision making and state persistence

											
										
										
											2025-12-12 15:27:12 +08:00
+								"""
 								Prompt Caching 模块
 								为支持缓存的 LLM（如 Anthropic Claude）提供 Prompt 缓存功能。
 								通过在系统提示词和早期对话中添加缓存标记，减少重复处理，
 								显著降低 Token 消耗和响应延迟。
 								支持的 LLM:
 								- Anthropic Claude (claude-3-5-sonnet, claude-3-opus, claude-3-haiku)
 								- OpenAI (部分模型支持)
 								缓存策略:
 								- 短对话（<10轮）: 仅缓存系统提示词
 								- 中等对话（10-30轮）: 缓存系统提示词 + 前5轮对话
 								- 长对话（>30轮）: 多个缓存点，动态调整
 								"""
 								import logging
 								from typing import Dict, Any, List, Optional, Tuple
 								from dataclasses import dataclass, field
 								from enum import Enum
 								logger = logging.getLogger(__name__)
 								class CacheStrategy(str, Enum):
 								    """缓存策略"""
 								    NONE = "none"                    # 不缓存
 								    SYSTEM_ONLY = "system_only"      # 仅缓存系统提示词
 								    SYSTEM_AND_EARLY = "system_early"  # 缓存系统提示词和早期对话
 								    MULTI_POINT = "multi_point"      # 多缓存点
 								@dataclass
 								class CacheConfig:
 								    """缓存配置"""
 								    enabled: bool = True
 								    strategy: CacheStrategy = CacheStrategy.SYSTEM_AND_EARLY
 								    # 缓存阈值
 								    min_system_prompt_tokens: int = 1000  # 系统提示词最小 token 数才启用缓存
 								    early_messages_count: int = 5         # 早期对话缓存的消息数
 								    # 多缓存点配置
 								    multi_point_interval: int = 10        # 多缓存点间隔（消息数）
 								    max_cache_points: int = 4             # 最大缓存点数量
 								@dataclass
 								class CacheStats:
 								    """缓存统计"""
 								    cache_hits: int = 0
 								    cache_misses: int = 0
 								    cached_tokens: int = 0
 								    total_tokens: int = 0
 								    @property
 								    def hit_rate(self) -> float:
 								        total = self.cache_hits + self.cache_misses
 								        return self.cache_hits / total if total > 0 else 0.0
 								    @property
 								    def token_savings(self) -> float:
 								        return self.cached_tokens / self.total_tokens if self.total_tokens > 0 else 0.0
 								class PromptCacheManager:
 								    """
 								    Prompt 缓存管理器
 								    负责:
 . 检测 LLM 是否支持缓存
 . 根据对话长度选择缓存策略
 . 为消息添加缓存标记
 . 统计缓存效果
 								    """
 								    # 支持缓存的模型
 								    CACHEABLE_MODELS = {
 								        # Anthropic Claude
 								        "claude-3-5-sonnet": True,
 								        "claude-3-5-sonnet-20241022": True,
 								        "claude-3-opus": True,
 								        "claude-3-opus-20240229": True,
 								        "claude-3-haiku": True,
 								        "claude-3-haiku-20240307": True,
 								        "claude-3-sonnet": True,
 								        "claude-3-sonnet-20240229": True,
 								        # OpenAI (部分支持)
 								        "gpt-4-turbo": False,  # 暂不支持
 								        "gpt-4o": False,
 								        "gpt-4o-mini": False,
 								    }
 								    # Anthropic 缓存标记
 								    ANTHROPIC_CACHE_CONTROL = {"type": "ephemeral"}
 								    def __init__(self, config: Optional[CacheConfig] = None):
 								        self.config = config or CacheConfig()
 								        self.stats = CacheStats()
 								        self._cache_enabled_for_session = True
 								    def supports_caching(self, model: str, provider: str) -> bool:
 								        """
 								        检查模型是否支持缓存
 								        Args:
 								            model: 模型名称
 								            provider: 提供商名称
 								        Returns:
 								            是否支持缓存
 								        """
 								        if not self.config.enabled:
 								            return False
 								        # Anthropic Claude 支持缓存
 								        if provider.lower() in ["anthropic", "claude"]:
 								            # 检查模型名称
 								            for cacheable_model in self.CACHEABLE_MODELS:
 								                if cacheable_model in model.lower():
 								                    return self.CACHEABLE_MODELS.get(cacheable_model, False)
 								        return False
 								    def determine_strategy(
 								        self,
 								        messages: List[Dict[str, Any]],
 								        system_prompt_tokens: int = 0,
 								    ) -> CacheStrategy:
 								        """
 								        根据对话状态确定缓存策略
 								        Args:
 								            messages: 消息列表
 								            system_prompt_tokens: 系统提示词的 token 数
 								        Returns:
 								            缓存策略
 								        """
 								        if not self.config.enabled:
 								            return CacheStrategy.NONE
 								        # 系统提示词太短，不值得缓存
 								        if system_prompt_tokens < self.config.min_system_prompt_tokens:
 								            return CacheStrategy.NONE
 								        message_count = len(messages)
 								        # 短对话：仅缓存系统提示词
 								        if message_count < 10:
 								            return CacheStrategy.SYSTEM_ONLY
 								        # 中等对话：缓存系统提示词和早期对话
 								        if message_count < 30:
 								            return CacheStrategy.SYSTEM_AND_EARLY
 								        # 长对话：多缓存点
 								        return CacheStrategy.MULTI_POINT
 								    def add_cache_markers_anthropic(
 								        self,
 								        messages: List[Dict[str, Any]],
 								        strategy: CacheStrategy,
 								    ) -> List[Dict[str, Any]]:
 								        """
 								        为 Anthropic Claude 消息添加缓存标记
 								        Anthropic 的缓存格式:
 								        - 在 content 中使用 cache_control 字段
 								        - 支持 text 类型的 content block
 								        Args:
 								            messages: 原始消息列表
 								            strategy: 缓存策略
 								        Returns:
 								            添加了缓存标记的消息列表
 								        """
 								        if strategy == CacheStrategy.NONE:
 								            return messages
 								        cached_messages = []
 								        for i, msg in enumerate(messages):
 								            new_msg = msg.copy()
 								            # 系统提示词缓存
 								            if msg.get("role") == "system":
 								                new_msg = self._add_cache_to_message(new_msg)
 								                cached_messages.append(new_msg)
 								                continue
 								            # 早期对话缓存
 								            if strategy in [CacheStrategy.SYSTEM_AND_EARLY, CacheStrategy.MULTI_POINT]:
 								                if i <= self.config.early_messages_count:
 								                    new_msg = self._add_cache_to_message(new_msg)
 								            # 多缓存点
 								            if strategy == CacheStrategy.MULTI_POINT:
 								                if i > 0 and i % self.config.multi_point_interval == 0:
 								                    cache_point_count = i // self.config.multi_point_interval
 								                    if cache_point_count <= self.config.max_cache_points:
 								                        new_msg = self._add_cache_to_message(new_msg)
 								            cached_messages.append(new_msg)
 								        return cached_messages
 								    def _add_cache_to_message(self, msg: Dict[str, Any]) -> Dict[str, Any]:
 								        """
 								        为单条消息添加缓存标记
 								        Args:
 								            msg: 原始消息
 								        Returns:
 								            添加了缓存标记的消息
 								        """
 								        content = msg.get("content", "")
 								        # 如果 content 是字符串，转换为 content block 格式
 								        if isinstance(content, str):
 								            msg["content"] = [
 								                {
 								                    "type": "text",
 								                    "text": content,
 								                    "cache_control": self.ANTHROPIC_CACHE_CONTROL,
 								                }
 								            ]
 								        elif isinstance(content, list):
 								            # 已经是 content block 格式，为最后一个 block 添加缓存
 								            if content:
 								                last_block = content[-1]
 								                if isinstance(last_block, dict):
 								                    last_block["cache_control"] = self.ANTHROPIC_CACHE_CONTROL
 								        return msg
 								    def process_messages(
 								        self,
 								        messages: List[Dict[str, Any]],
 								        model: str,
 								        provider: str,
 								        system_prompt_tokens: int = 0,
 								    ) -> Tuple[List[Dict[str, Any]], bool]:
 								        """
 								        处理消息，添加缓存标记
 								        Args:
 								            messages: 原始消息列表
 								            model: 模型名称
 								            provider: 提供商名称
 								            system_prompt_tokens: 系统提示词 token 数
 								        Returns:
 								            (处理后的消息列表, 是否启用了缓存)
 								        """
 								        if not self.supports_caching(model, provider):
 								            return messages, False
 								        strategy = self.determine_strategy(messages, system_prompt_tokens)
 								        if strategy == CacheStrategy.NONE:
 								            return messages, False
 								        # 根据提供商选择缓存方法
 								        if provider.lower() in ["anthropic", "claude"]:
 								            cached_messages = self.add_cache_markers_anthropic(messages, strategy)
 								            logger.debug(f"Applied {strategy.value} caching strategy for Anthropic")
 								            return cached_messages, True
 								        return messages, False
 								    def update_stats(
 								        self,
 								        cache_creation_input_tokens: int = 0,
 								        cache_read_input_tokens: int = 0,
 								        total_input_tokens: int = 0,
 								    ):
 								        """
 								        更新缓存统计
 								        Args:
 								            cache_creation_input_tokens: 缓存创建的 token 数
 								            cache_read_input_tokens: 缓存读取的 token 数
 								            total_input_tokens: 总输入 token 数
 								        """
 								        if cache_read_input_tokens > 0:
 								            self.stats.cache_hits += 1
 								            self.stats.cached_tokens += cache_read_input_tokens
 								        else:
 								            self.stats.cache_misses += 1
 								        self.stats.total_tokens += total_input_tokens
 								    def get_stats_summary(self) -> Dict[str, Any]:
 								        """获取缓存统计摘要"""
 								        return {
 								            "cache_hits": self.stats.cache_hits,
 								            "cache_misses": self.stats.cache_misses,
 								            "hit_rate": f"{self.stats.hit_rate:.2%}",
 								            "cached_tokens": self.stats.cached_tokens,
 								            "total_tokens": self.stats.total_tokens,
 								            "token_savings": f"{self.stats.token_savings:.2%}",
 								        }
 								# 全局缓存管理器实例
 								prompt_cache_manager = PromptCacheManager()
 								def estimate_tokens(text: str) -> int:
 								    """
 								    估算文本的 token 数量
 								    简单估算：英文约 4 字符/token，中文约 2 字符/token
 								    Args:
 								        text: 文本内容
 								    Returns:
 								        估算的 token 数
 								    """
 								    if not text:
 								        return 0
 								    # 统计中文字符
 								    chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
 								    other_chars = len(text) - chinese_chars
 								    # 中文约 2 字符/token，其他约 4 字符/token
 								    return int(chinese_chars / 2 + other_chars / 4)