From f95efd7089deaa7cad8bccbd7186b60b678d7efd Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Fri, 5 Jun 2026 15:54:03 +0000 Subject: [PATCH] fix: batch FAISS embedding to avoid 429 rate limit exhaustion Split document embedding into 100-chunk batches with a 1s delay between batches so a 429 only retries one batch (~1 API call) rather than restarting FAISS.from_documents from scratch (~87 calls). Also raise retry wait times from max 120s to max 600s to give the quota time to reset before the next attempt. Signed-off-by: Jack Luar --- backend/src/vectorstores/faiss.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/src/vectorstores/faiss.py b/backend/src/vectorstores/faiss.py index 8c7c93b3..52cdc15f 100644 --- a/backend/src/vectorstores/faiss.py +++ b/backend/src/vectorstores/faiss.py @@ -1,4 +1,5 @@ import os +import time import logging from typing import Optional, Union from dotenv import load_dotenv @@ -74,13 +75,13 @@ def faiss_db(self) -> Optional[FAISS]: @retry( stop=stop_after_attempt(5), - wait=wait_exponential(multiplier=2, min=10, max=120), + wait=wait_exponential(multiplier=2, min=60, max=600), retry=retry_if_exception( lambda e: "RESOURCE_EXHAUSTED" in str(e) or "429" in str(e) ), reraise=True, ) - def _add_to_db(self, documents: list[Document]) -> None: + def _embed_and_add(self, documents: list[Document]) -> None: if self._faiss_db is None: self._faiss_db = FAISS.from_documents( documents=documents, @@ -90,6 +91,13 @@ def _add_to_db(self, documents: list[Document]) -> None: else: self._faiss_db.add_documents(documents) + def _add_to_db(self, documents: list[Document], batch_size: int = 100) -> None: + for i in range(0, len(documents), batch_size): + batch = documents[i : i + batch_size] + self._embed_and_add(batch) + if i + batch_size < len(documents): + time.sleep(1) + def add_md_docs( self, folder_paths: list[str], chunk_size: int = 500, return_docs: bool = False ) -> Optional[list[Document]]: @@ -229,7 +237,7 @@ def get_documents(self) -> list[Document]: @retry( stop=stop_after_attempt(5), - wait=wait_exponential(multiplier=2, min=10, max=120), + wait=wait_exponential(multiplier=2, min=60, max=600), retry=retry_if_exception( lambda e: "RESOURCE_EXHAUSTED" in str(e) or "429" in str(e) ),