From 53eb24ac99bc4bc2046183003c618c198d9f96bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9?= Date: Sun, 17 May 2026 15:54:35 +0200 Subject: [PATCH 1/2] feat(logosdb): add LogosDB embedded vector database engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Implement LogosDB engine: LogosDBConfigurator, LogosDBUploader, LogosDBSearcher - DB path configured via connection_params.path (default /tmp/logosdb_vdb_bench) - max_elements configurable via collection_params (default 2,000,000) - Distance mapping: cosine → DIST_COSINE, dot → DIST_IP, l2 → DIST_L2 - Metadata sidecar (.meta.json) bridges configure → upload → search phases - Add experiment preset logosdb-m16-ef200 (glove-25-angular baseline) - Add logosdb>=0.9.0 to pyproject.toml dependencies Benchmark result (glove-25-angular, 1.18M vectors, dim=25, cosine): upload_time=345s rps=9401 mean_precision=0.939 p99=0.15ms Co-authored-by: Cursor --- engine/clients/client_factory.py | 4 +++ engine/clients/logosdb/__init__.py | 9 ++++++ engine/clients/logosdb/configure.py | 43 +++++++++++++++++++++++++ engine/clients/logosdb/search.py | 34 +++++++++++++++++++ engine/clients/logosdb/upload.py | 40 +++++++++++++++++++++++ experiments/configurations/logosdb.json | 22 +++++++++++++ pyproject.toml | 1 + 7 files changed, 153 insertions(+) create mode 100644 engine/clients/logosdb/__init__.py create mode 100644 engine/clients/logosdb/configure.py create mode 100644 engine/clients/logosdb/search.py create mode 100644 engine/clients/logosdb/upload.py create mode 100644 experiments/configurations/logosdb.json diff --git a/engine/clients/client_factory.py b/engine/clients/client_factory.py index c5f47521c..89888a3b8 100644 --- a/engine/clients/client_factory.py +++ b/engine/clients/client_factory.py @@ -34,6 +34,7 @@ QdrantNativeSearcher, QdrantNativeUploader, ) +from engine.clients.logosdb import LogosDBConfigurator, LogosDBSearcher, LogosDBUploader from engine.clients.redis import RedisConfigurator, RedisSearcher, RedisUploader from engine.clients.weaviate import ( WeaviateConfigurator, @@ -42,6 +43,7 @@ ) ENGINE_CONFIGURATORS = { + "logosdb": LogosDBConfigurator, "qdrant": QdrantConfigurator, "qdrant_native": QdrantNativeConfigurator, "qdrant_hybrid": QdrantHybridConfigurator, @@ -54,6 +56,7 @@ } ENGINE_UPLOADERS = { + "logosdb": LogosDBUploader, "qdrant": QdrantUploader, "qdrant_native": QdrantNativeUploader, "qdrant_hybrid": QdrantHybridUploader, @@ -66,6 +69,7 @@ } ENGINE_SEARCHERS = { + "logosdb": LogosDBSearcher, "qdrant": QdrantSearcher, "qdrant_native": QdrantNativeSearcher, "qdrant_hybrid": QdrantHybridSearcher, diff --git a/engine/clients/logosdb/__init__.py b/engine/clients/logosdb/__init__.py new file mode 100644 index 000000000..604a59a49 --- /dev/null +++ b/engine/clients/logosdb/__init__.py @@ -0,0 +1,9 @@ +from engine.clients.logosdb.configure import LogosDBConfigurator +from engine.clients.logosdb.search import LogosDBSearcher +from engine.clients.logosdb.upload import LogosDBUploader + +__all__ = [ + "LogosDBConfigurator", + "LogosDBSearcher", + "LogosDBUploader", +] diff --git a/engine/clients/logosdb/configure.py b/engine/clients/logosdb/configure.py new file mode 100644 index 000000000..48cfae7c0 --- /dev/null +++ b/engine/clients/logosdb/configure.py @@ -0,0 +1,43 @@ +import json +import os +import shutil + +from benchmark.dataset import Dataset +from engine.base_client.configure import BaseConfigurator +from engine.base_client.distances import Distance + +DISTANCE_MAP = { + Distance.COSINE: 1, # logosdb.DIST_COSINE + Distance.DOT: 0, # logosdb.DIST_IP + Distance.L2: 2, # logosdb.DIST_L2 +} + +DEFAULT_PATH = "/tmp/logosdb_vdb_bench" + + +class LogosDBConfigurator(BaseConfigurator): + def __init__(self, host, collection_params: dict, connection_params: dict): + super().__init__(host, collection_params, connection_params) + self.path = connection_params.get("path", DEFAULT_PATH) + + def clean(self): + if os.path.exists(self.path): + shutil.rmtree(self.path) + meta = self.path + ".meta.json" + if os.path.exists(meta): + os.remove(meta) + + def recreate(self, dataset: Dataset, collection_params): + import logosdb + + dim = dataset.config.vector_size + dist = DISTANCE_MAP.get(dataset.config.distance, logosdb.DIST_COSINE) + max_elements = collection_params.get("max_elements", 2_000_000) + + db = logosdb.DB(self.path, dim=dim, distance=dist, max_elements=max_elements) + del db + + with open(self.path + ".meta.json", "w") as f: + json.dump({"dim": dim, "distance": int(dist), "max_elements": max_elements}, f) + + return {} diff --git a/engine/clients/logosdb/search.py b/engine/clients/logosdb/search.py new file mode 100644 index 000000000..9dc60a8df --- /dev/null +++ b/engine/clients/logosdb/search.py @@ -0,0 +1,34 @@ +import json +from typing import List, Tuple + +import numpy as np + +from dataset_reader.base_reader import Query +from engine.base_client.search import BaseSearcher + +DEFAULT_PATH = "/tmp/logosdb_vdb_bench" + + +class LogosDBSearcher(BaseSearcher): + client = None + + @classmethod + def init_client(cls, host: str, distance, connection_params: dict, search_params: dict): + import logosdb + + path = connection_params.get("path", DEFAULT_PATH) + with open(path + ".meta.json") as f: + meta = json.load(f) + cls.client = logosdb.DB(path, dim=meta["dim"], distance=meta["distance"], max_elements=meta.get("max_elements", 2_000_000)) + + @classmethod + def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]: + q = np.array(query.vector, dtype=np.float32) + hits = cls.client.search(q, top_k=top) + return [(int(h.text), h.score) for h in hits] + + @classmethod + def delete_client(cls): + if cls.client is not None: + del cls.client + cls.client = None diff --git a/engine/clients/logosdb/upload.py b/engine/clients/logosdb/upload.py new file mode 100644 index 000000000..6fab4d135 --- /dev/null +++ b/engine/clients/logosdb/upload.py @@ -0,0 +1,40 @@ +import json +from typing import List + +import numpy as np + +from dataset_reader.base_reader import Record +from engine.base_client.upload import BaseUploader + +DEFAULT_PATH = "/tmp/logosdb_vdb_bench" + + +class LogosDBUploader(BaseUploader): + client = None + upload_params = {} + + @classmethod + def init_client(cls, host, distance, connection_params: dict, upload_params: dict): + import logosdb + + path = connection_params.get("path", DEFAULT_PATH) + with open(path + ".meta.json") as f: + meta = json.load(f) + cls.client = logosdb.DB(path, dim=meta["dim"], distance=meta["distance"], max_elements=meta.get("max_elements", 2_000_000)) + cls.upload_params = upload_params + + @classmethod + def upload_batch(cls, batch: List[Record]): + vectors = np.array([r.vector for r in batch], dtype=np.float32) + texts = [str(r.id) for r in batch] + cls.client.put_batch(vectors, texts=texts) + + @classmethod + def post_upload(cls, distance): + return {} + + @classmethod + def delete_client(cls): + if cls.client is not None: + del cls.client + cls.client = None diff --git a/experiments/configurations/logosdb.json b/experiments/configurations/logosdb.json new file mode 100644 index 000000000..3c5d0e366 --- /dev/null +++ b/experiments/configurations/logosdb.json @@ -0,0 +1,22 @@ +[ + { + "name": "logosdb-m16-ef200", + "engine": "logosdb", + "connection_params": { + "path": "/tmp/logosdb_vdb_bench" + }, + "collection_params": { + "max_elements": 2000000 + }, + "upload_params": { + "parallel": 1, + "batch_size": 1000 + }, + "search_params": [ + { + "parallel": 1, + "top": 10 + } + ] + } +] diff --git a/pyproject.toml b/pyproject.toml index 79e4c17ad..a2d693e58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ opensearch-py = "^2.3.2" tqdm = "^4.66.1" psycopg = {extras = ["binary"], version = "^3.1.17"} pgvector = "^0.2.4" +logosdb = ">=0.9.0" [tool.poetry.group.dev.dependencies] pre-commit = "^2.20.0" From afdb01501fa5dc69e5ed618d67f260c981df2bfc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 13:54:52 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- engine/clients/client_factory.py | 2 +- engine/clients/logosdb/configure.py | 8 +++++--- engine/clients/logosdb/search.py | 11 +++++++++-- engine/clients/logosdb/upload.py | 7 ++++++- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/engine/clients/client_factory.py b/engine/clients/client_factory.py index 89888a3b8..be5d06b25 100644 --- a/engine/clients/client_factory.py +++ b/engine/clients/client_factory.py @@ -12,6 +12,7 @@ ElasticSearcher, ElasticUploader, ) +from engine.clients.logosdb import LogosDBConfigurator, LogosDBSearcher, LogosDBUploader from engine.clients.milvus import MilvusConfigurator, MilvusSearcher, MilvusUploader from engine.clients.opensearch import ( OpenSearchConfigurator, @@ -34,7 +35,6 @@ QdrantNativeSearcher, QdrantNativeUploader, ) -from engine.clients.logosdb import LogosDBConfigurator, LogosDBSearcher, LogosDBUploader from engine.clients.redis import RedisConfigurator, RedisSearcher, RedisUploader from engine.clients.weaviate import ( WeaviateConfigurator, diff --git a/engine/clients/logosdb/configure.py b/engine/clients/logosdb/configure.py index 48cfae7c0..d50e41cd3 100644 --- a/engine/clients/logosdb/configure.py +++ b/engine/clients/logosdb/configure.py @@ -8,8 +8,8 @@ DISTANCE_MAP = { Distance.COSINE: 1, # logosdb.DIST_COSINE - Distance.DOT: 0, # logosdb.DIST_IP - Distance.L2: 2, # logosdb.DIST_L2 + Distance.DOT: 0, # logosdb.DIST_IP + Distance.L2: 2, # logosdb.DIST_L2 } DEFAULT_PATH = "/tmp/logosdb_vdb_bench" @@ -38,6 +38,8 @@ def recreate(self, dataset: Dataset, collection_params): del db with open(self.path + ".meta.json", "w") as f: - json.dump({"dim": dim, "distance": int(dist), "max_elements": max_elements}, f) + json.dump( + {"dim": dim, "distance": int(dist), "max_elements": max_elements}, f + ) return {} diff --git a/engine/clients/logosdb/search.py b/engine/clients/logosdb/search.py index 9dc60a8df..104cbf2ef 100644 --- a/engine/clients/logosdb/search.py +++ b/engine/clients/logosdb/search.py @@ -13,13 +13,20 @@ class LogosDBSearcher(BaseSearcher): client = None @classmethod - def init_client(cls, host: str, distance, connection_params: dict, search_params: dict): + def init_client( + cls, host: str, distance, connection_params: dict, search_params: dict + ): import logosdb path = connection_params.get("path", DEFAULT_PATH) with open(path + ".meta.json") as f: meta = json.load(f) - cls.client = logosdb.DB(path, dim=meta["dim"], distance=meta["distance"], max_elements=meta.get("max_elements", 2_000_000)) + cls.client = logosdb.DB( + path, + dim=meta["dim"], + distance=meta["distance"], + max_elements=meta.get("max_elements", 2_000_000), + ) @classmethod def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]: diff --git a/engine/clients/logosdb/upload.py b/engine/clients/logosdb/upload.py index 6fab4d135..ce0474c74 100644 --- a/engine/clients/logosdb/upload.py +++ b/engine/clients/logosdb/upload.py @@ -20,7 +20,12 @@ def init_client(cls, host, distance, connection_params: dict, upload_params: dic path = connection_params.get("path", DEFAULT_PATH) with open(path + ".meta.json") as f: meta = json.load(f) - cls.client = logosdb.DB(path, dim=meta["dim"], distance=meta["distance"], max_elements=meta.get("max_elements", 2_000_000)) + cls.client = logosdb.DB( + path, + dim=meta["dim"], + distance=meta["distance"], + max_elements=meta.get("max_elements", 2_000_000), + ) cls.upload_params = upload_params @classmethod