Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
81826b7
wip(rag): V1 of a rag working with an ollama llm working with the cur…
MatthiasvonRakowski May 7, 2026
87588fb
wip(rag): set the filter at None to be able to restrieve collections …
MatthiasvonRakowski May 7, 2026
fe78725
Merge remote-tracking branch 'origin/dev' into mvr/#38/setupRAG
MatthiasvonRakowski May 11, 2026
9b7dade
feat(id): add ids to make it work with the rag system + an ingestion …
MatthiasvonRakowski May 11, 2026
52b2cc5
clean(id): clean code
MatthiasvonRakowski May 11, 2026
209969e
feat(ingestion): ingestion done with the possibility of semantic and …
MatthiasvonRakowski May 12, 2026
b5f810f
wip(todo): Add some todos to not forget the work I have to do
MatthiasvonRakowski May 12, 2026
a0418a9
refacto(user_ids): user_id -> user_id
MatthiasvonRakowski May 15, 2026
529297a
wip(pr): add a module with ids and generate a rag class with module w…
MatthiasvonRakowski May 15, 2026
9cc6083
Merge remote-tracking branch 'origin/mvr/#14/ids_managment' into mvr/…
MatthiasvonRakowski May 15, 2026
04073a0
wip(docker): add a docker that launch with one commande. Only work wi…
MatthiasvonRakowski May 15, 2026
eebb216
wip(config): move qdrant, ollama into a config file.
MatthiasvonRakowski May 18, 2026
52c03bb
wip(config): config file client updated
MatthiasvonRakowski May 18, 2026
7bdcc71
feat(huri): update config file
MatthiasvonRakowski May 18, 2026
452fc4d
fix(ingestion): update for the wrong branch now fixed
MatthiasvonRakowski May 18, 2026
2f691fa
merge: dev -> launch docker
MatthiasvonRakowski May 18, 2026
96d1da7
merge: dev -> launch docker
MatthiasvonRakowski May 18, 2026
eb4f98c
fix(config): huri.yaml fix
MatthiasvonRakowski May 18, 2026
450f21d
remove(main): remove unnecessary function
MatthiasvonRakowski May 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions config/client_aux2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
huri_url: ws://localhost:8000/session

topic_list: ["transcript", "question", "rag_response"]
sample_rate: 16000
frame_duration: 0.030
modules:
mic:
name: mic
args:
vad_agressiveness: 3
silence_duration: 1.5
block_duration: ${frame_duration}
stt:
name: stt
args:
language: "en"
block_duration: ${frame_duration}
logging: INFO
tag:
name: tag
logging: INFO
rag:
name: rag
args:
language: "en"
tone: "formal"
15 changes: 15 additions & 0 deletions config/huri.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,25 @@ logging_config:
enable_access_log: true
additional_log_standard_attrs: []

services:
qdrant:
port: 6333
image: "qdrant/qdrant:latest"
storage_volume: "qdrant_data"
ollama:
model: "mistral:7b"
image: "ollama/ollama:rocm"
gpu_devices: true
num_replicas: 1

applications:
- name: huri-app
route_prefix: /
import_path: src.app:app
runtime_env: { RAY_COLOR_PREFIX=1 }
deployments:
- name: HuRI
- name: RAGHandle
num_replicas: 2
- name: OllamaService
- name: QdrantService
37 changes: 35 additions & 2 deletions src/app.py
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tu pourrais possiblement faire la config de qdrant et OllamaService danss le config file huri.yaml je pense, ce srait plus clean

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T as raison je vais regarder pour le faire

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bizarement fait

Original file line number Diff line number Diff line change
@@ -1,15 +1,48 @@
from pathlib import Path

import yaml
from ray.serve import Application

from src.core.huri import HuRI
from src.modules.factory import bind_deployment_handles
from src.modules.modules import get_modules
from src.modules.rag.docker_services import OllamaService, QdrantService


def load_services_config() -> dict:
config_path = Path(__file__).resolve().parents[1] / "config" / "huri.yaml"
with open(config_path) as f:
config = yaml.safe_load(f)
return config.get("services", {})


def build_qdrant(config: dict):
return QdrantService.bind(
port=config.get("port", 6333),
image=config.get("image", "qdrant/qdrant:latest"),
storage_volume=config.get("storage_volume", "qdrant_data"),
)


def build_ollama(config: dict):
return OllamaService.options(
num_replicas=config.get("num_replicas", 1),
).bind(
model=config.get("model", "mistral:7b"),
image=config.get("image", "ollama/ollama:latest"),
gpu_devices=config.get("gpu_devices", False),
)


def build_app() -> Application:
modules = get_modules()
handles = bind_deployment_handles(modules)
services_config = load_services_config()

qdrant = build_qdrant(services_config.get("qdrant", {}))
ollama = build_ollama(services_config.get("ollama", {}))

app: Application = HuRI.bind(modules, handles) # type: ignore[attr-defined]
handles = bind_deployment_handles(modules, ollama=ollama, qdrant=qdrant)
app: Application = HuRI.bind(modules, handles)
return app


Expand Down
13 changes: 11 additions & 2 deletions src/modules/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,24 @@ def create_from_config(

def bind_deployment_handles(
modules: Dict[str, Type[Module]],
**service_handles,
) -> Dict[str, handle.DeploymentHandle]:
handles: Dict[str, handle.DeploymentHandle] = {}
for name, module_cls in modules.items():
if not issubclass(module_cls, ModuleWithHandle):
continue

if not hasattr(module_cls, "_handle_cls"):
raise TypeError(f"{module_cls.__name__} must define _handle_cls")

handle_cls = module_cls._handle_cls
handles[name] = handle_cls.bind()

if name == "rag" and service_handles:
handles[name] = handle_cls.bind(
ollama_handle=service_handles.get("ollama"),
qdrant_handle=service_handles.get("qdrant"),
)
else:
handles[name] = handle_cls.bind()

return handles
220 changes: 220 additions & 0 deletions src/modules/rag/docker_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
import time
import socket
import subprocess

import httpx
from ray import serve


def find_free_port() -> int:
"""
Ask the OS for a random free port.
We need this because if we run multiple Ollama containers,
they can't all use port 11434 — each needs its own.
"""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", 0))
return s.getsockname()[1]


def wait_for_service(url: str, timeout: int = 120) -> bool:
"""
Returns True if ready, False if timeout.
"""
start = time.time()
while time.time() - start < timeout:
try:
resp = httpx.get(url, timeout=5)
if resp.status_code == 200:
return True
except Exception:
pass
time.sleep(2)
return False


def is_container_running(name: str) -> bool:
"""Check if a Docker container with this name is already running."""
result = subprocess.run(
["docker", "ps", "-q", "-f", f"name=^{name}$"],
capture_output=True, text=True,
)
return bool(result.stdout.strip())


def remove_container(name: str):
"""Force remove a container by name (ignores errors if it doesn't exist)."""
subprocess.run(["docker", "rm", "-f", name], capture_output=True)


@serve.deployment
class OllamaService:
"""
Manages one Ollama Docker container.

LIFECYCLE:
__init__: starts container -> waits for it -> pulls model
generate: sends a prompt to the container, returns the answer
__del__: stops and removes the container
"""

def __init__(
self,
model: str = "mistral:7b",
image: str = "ollama/ollama:latest",
gpu_devices: bool = False,
):
self.model = model
self.port = find_free_port()
self.container_name = f"ollama-ray-{self.port}"
self.base_url = f"http://localhost:{self.port}"

remove_container(self.container_name)

cmd = [
"docker", "run", "-d",
"--name", self.container_name,
"-p", f"{self.port}:11434",
"-v", "ollama_shared:/root/.ollama",
]

if gpu_devices:
cmd.extend([
"--device=/dev/kfd",
"--device=/dev/dri",
"--group-add=video",
])

cmd.append(image)

print(f"[OllamaService] Starting container '{self.container_name}' on port {self.port}...")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Docker failed: {result.stderr}")

print(f"[OllamaService] Waiting for Ollama to be ready...")
if not wait_for_service(f"{self.base_url}/api/tags"):
raise RuntimeError(f"Ollama didn't start within timeout on port {self.port}")

print(f"[OllamaService] Pulling model '{model}'...")
pull_result = subprocess.run(
["docker", "exec", self.container_name, "ollama", "pull", model],
capture_output=True, text=True,
)
if pull_result.returncode != 0:
raise RuntimeError(f"Failed to pull model: {pull_result.stderr}")

print(f"[OllamaService] Ready! container='{self.container_name}', port={self.port}, model='{model}'")


async def generate(
self,
messages: list,
max_tokens: int = 1024,
temperature: float = 0.1,
) -> str:
"""
Send messages to Ollama and return the response.
This is what RAGHandle calls to get LLM answers.
"""
async with httpx.AsyncClient(timeout=60.0) as client:
resp = await client.post(
f"{self.base_url}/api/chat",
json={
"model": self.model,
"messages": messages,
"stream": False,
"options": {
"num_predict": max_tokens,
"temperature": temperature,
},
},
)
resp.raise_for_status()
return resp.json()["message"]["content"]

async def health(self) -> dict:
"""Check if this Ollama instance is alive."""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{self.base_url}/api/tags")
return {"status": "ok", "port": self.port, "container": self.container_name}
except Exception as e:
return {"status": "error", "error": str(e)}

def __del__(self):
"""Cleanup when Ray destroys this replica."""
print(f"[OllamaService] Removing container '{self.container_name}'")
remove_container(self.container_name)


@serve.deployment(num_replicas=1)
class QdrantService:
"""
Manages a Qdrant Docker container.

LIFECYCLE:
__init__: starts container (or reuses if already running)
get_url: returns the URL other services should connect to
__del__: leaves the container running (it has data!)
"""

def __init__(
self,
port: int = 6333,
image: str = "qdrant/qdrant:latest",
storage_volume: str = "qdrant_data",
):
self.port = port
self.container_name = "qdrant-ray"
self.url = f"http://localhost:{self.port}"

if self._is_healthy():
print(f"[QdrantService] Qdrant already running on port {self.port}")
return

remove_container(self.container_name)

cmd = [
"docker", "run", "-d",
"--name", self.container_name,
"-p", f"{self.port}:6333",
"-v", f"{storage_volume}:/qdrant/storage",
image,
]

print(f"[QdrantService] Starting Qdrant on port {self.port}...")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Docker failed: {result.stderr}")

if not wait_for_service(f"{self.url}/healthz"):
raise RuntimeError(f"Qdrant didn't start within timeout on port {self.port}")

print(f"[QdrantService] Ready on port {self.port}")


def _is_healthy(self) -> bool:
try:
resp = httpx.get(f"{self.url}/healthz", timeout=3)
return resp.status_code == 200
except Exception:
return False


async def get_url(self) -> str:
"""Return the URL. Called by RAGHandle to know where Qdrant is."""
return self.url


async def health(self) -> dict:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{self.url}/healthz")
return {"status": "ok", "port": self.port, "url": self.url}
except Exception as e:
return {"status": "error", "error": str(e)}


def __del__(self):
print(f"[QdrantService] Actor destroyed. Container '{self.container_name}' left running.")
Loading