diff --git a/backend/app/services/rag/embeddings.py b/backend/app/services/rag/embeddings.py index 894024b..f7286c3 100644 --- a/backend/app/services/rag/embeddings.py +++ b/backend/app/services/rag/embeddings.py @@ -639,7 +639,9 @@ class EmbeddingService: ) # 🔥 控制并发请求数 (RPS 限制) - self._semaphore = asyncio.Semaphore(30) + # 全局 RPS 限制为 30,由 4 个 gunicorn worker 共享 + # 每个 worker 限制为 30/4 = 7 个并发请求,确保不触发限流 + self._semaphore = asyncio.Semaphore(7) # 🔥 设置默认批次大小 (对于 remote 模型,用户要求为 10) is_remote = self.provider.lower() in ["openai", "qwen", "azure", "cohere", "jina", "huggingface"]