diff --git a/infra/docker-compose.override.yml b/infra/docker-compose.override.yml index 0be7631..fa850f7 100644 --- a/infra/docker-compose.override.yml +++ b/infra/docker-compose.override.yml @@ -113,6 +113,9 @@ services: - "traefik.http.services.api-docs.loadbalancer.server.port=8080" web-client: + build: + args: + VITE_KEYCLOAK_URL: http://localhost:8081/auth labels: !override - "traefik.enable=true" - "traefik.http.routers.web-client.entrypoints=web" diff --git a/services/py-genai-helper/file-storage/faust.pdf b/services/py-genai-helper/file-storage/faust.pdf new file mode 100644 index 0000000..9f74428 Binary files /dev/null and b/services/py-genai-helper/file-storage/faust.pdf differ diff --git a/services/py-genai-helper/rag.py b/services/py-genai-helper/rag.py index 5fa3abc..903d516 100644 --- a/services/py-genai-helper/rag.py +++ b/services/py-genai-helper/rag.py @@ -1,30 +1,41 @@ +from pathlib import Path + from dotenv import load_dotenv from langchain.agents import create_agent +from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_core.tools import create_retriever_tool from langchain_openai import OpenAIEmbeddings +from langchain_text_splitters import RecursiveCharacterTextSplitter load_dotenv() -# Loads an existing embedding model embeddings = OpenAIEmbeddings(model="text-embedding-3-large") -texts = [ - "I enjoy oranges.", - "I love apples.", - "I think pears taste very good", - "I hate bananas.", - "I dislike raspberries", - "I despise mangos.", - "I love Linux.", - "I hate Windows.", -] +_FILE_STORAGE = Path(__file__).parent / "file-storage" + + +def _load_pdfs() -> FAISS | None: + pdf_files = list(_FILE_STORAGE.glob("*.pdf")) + if not pdf_files: + return None + + splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) + docs = [] + for path in pdf_files: + loader = PyPDFLoader(str(path)) + docs.extend(loader.load_and_split(splitter)) -# Creates a vector store of our inputs using embeddings -vector_store = FAISS.from_texts(texts, embedding=embeddings) + return FAISS.from_documents(docs, embedding=embeddings) + + +vector_store = _load_pdfs() def get_rag_agent(): + if vector_store is None: + raise RuntimeError("No PDFs found in file-storage/") + retriever = vector_store.as_retriever(search_kwargs={"k": 3}) retriever_tool = create_retriever_tool( diff --git a/services/py-genai-helper/requirements.txt b/services/py-genai-helper/requirements.txt index d80ce14..874ce76 100644 --- a/services/py-genai-helper/requirements.txt +++ b/services/py-genai-helper/requirements.txt @@ -52,6 +52,7 @@ propcache==0.5.2 pydantic==2.13.4 pydantic-settings==2.14.1 pydantic_core==2.46.4 +pypdf==5.6.0 python-dotenv==1.2.2 PyYAML==6.0.3 regex==2026.5.9