From 14b75beb9ca8fa45d04427770a6ff0ae0bc8e761 Mon Sep 17 00:00:00 2001 From: vinland100 Date: Thu, 8 Jan 2026 18:40:23 +0800 Subject: [PATCH] Adjust the number of concurrent requests according to the number of workers. --- backend/app/services/rag/embeddings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/app/services/rag/embeddings.py b/backend/app/services/rag/embeddings.py index 894024b..f7286c3 100644 --- a/backend/app/services/rag/embeddings.py +++ b/backend/app/services/rag/embeddings.py @@ -639,7 +639,9 @@ class EmbeddingService: ) # 🔥 控制并发请求数 (RPS 限制) - self._semaphore = asyncio.Semaphore(30) + # 全局 RPS 限制为 30,由 4 个 gunicorn worker 共享 + # 每个 worker 限制为 30/4 = 7 个并发请求,确保不触发限流 + self._semaphore = asyncio.Semaphore(7) # 🔥 设置默认批次大小 (对于 remote 模型,用户要求为 10) is_remote = self.provider.lower() in ["openai", "qwen", "azure", "cohere", "jina", "huggingface"]