diff --git a/backend/app/services/rag/indexer.py b/backend/app/services/rag/indexer.py
index 2f22c60..168d489 100644
--- a/backend/app/services/rag/indexer.py
+++ b/backend/app/services/rag/indexer.py
@@ -1241,6 +1241,21 @@ class CodeIndexer:
         if not chunks:
             return
 
+        # 去重：确保没有重复的 ID
+        seen_ids: Set[str] = set()
+        unique_chunks: List[CodeChunk] = []
+        for chunk in chunks:
+            if chunk.id not in seen_ids:
+                seen_ids.add(chunk.id)
+                unique_chunks.append(chunk)
+            else:
+                logger.warning(f"跳过重复 ID 的代码块: {chunk.id} ({chunk.file_path}:{chunk.line_start})")
+
+        if len(unique_chunks) < len(chunks):
+            logger.info(f"🔄 去重: {len(chunks)} -> {len(unique_chunks)} 个代码块")
+
+        chunks = unique_chunks
+
         # 准备嵌入文本
         texts = [chunk.to_embedding_text() for chunk in chunks]
 
diff --git a/backend/app/services/rag/splitter.py b/backend/app/services/rag/splitter.py
index 2144f1c..4dbc89e 100644
--- a/backend/app/services/rag/splitter.py
+++ b/backend/app/services/rag/splitter.py
@@ -78,7 +78,8 @@ class CodeChunk:
             self.estimated_tokens = self._estimate_tokens()
     
     def _generate_id(self) -> str:
-        content = f"{self.file_path}:{self.line_start}:{self.line_end}:{self.content[:100]}"
+        # 使用完整内容的 hash 确保唯一性
+        content = f"{self.file_path}:{self.line_start}:{self.line_end}:{self.content}"
         return hashlib.sha256(content.encode()).hexdigest()[:16]
     
     def _estimate_tokens(self) -> int: