Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions engine/clients/client_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
ElasticSearcher,
ElasticUploader,
)
from engine.clients.logosdb import LogosDBConfigurator, LogosDBSearcher, LogosDBUploader
from engine.clients.milvus import MilvusConfigurator, MilvusSearcher, MilvusUploader
from engine.clients.opensearch import (
OpenSearchConfigurator,
Expand Down Expand Up @@ -42,6 +43,7 @@
)

ENGINE_CONFIGURATORS = {
"logosdb": LogosDBConfigurator,
"qdrant": QdrantConfigurator,
"qdrant_native": QdrantNativeConfigurator,
"qdrant_hybrid": QdrantHybridConfigurator,
Expand All @@ -54,6 +56,7 @@
}

ENGINE_UPLOADERS = {
"logosdb": LogosDBUploader,
"qdrant": QdrantUploader,
"qdrant_native": QdrantNativeUploader,
"qdrant_hybrid": QdrantHybridUploader,
Expand All @@ -66,6 +69,7 @@
}

ENGINE_SEARCHERS = {
"logosdb": LogosDBSearcher,
"qdrant": QdrantSearcher,
"qdrant_native": QdrantNativeSearcher,
"qdrant_hybrid": QdrantHybridSearcher,
Expand Down
9 changes: 9 additions & 0 deletions engine/clients/logosdb/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from engine.clients.logosdb.configure import LogosDBConfigurator
from engine.clients.logosdb.search import LogosDBSearcher
from engine.clients.logosdb.upload import LogosDBUploader

__all__ = [
"LogosDBConfigurator",
"LogosDBSearcher",
"LogosDBUploader",
]
45 changes: 45 additions & 0 deletions engine/clients/logosdb/configure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json
import os
import shutil

from benchmark.dataset import Dataset
from engine.base_client.configure import BaseConfigurator
from engine.base_client.distances import Distance

DISTANCE_MAP = {
Distance.COSINE: 1, # logosdb.DIST_COSINE
Distance.DOT: 0, # logosdb.DIST_IP
Distance.L2: 2, # logosdb.DIST_L2
}

DEFAULT_PATH = "/tmp/logosdb_vdb_bench"


class LogosDBConfigurator(BaseConfigurator):
def __init__(self, host, collection_params: dict, connection_params: dict):
super().__init__(host, collection_params, connection_params)
self.path = connection_params.get("path", DEFAULT_PATH)

def clean(self):
if os.path.exists(self.path):
shutil.rmtree(self.path)
meta = self.path + ".meta.json"
if os.path.exists(meta):
os.remove(meta)

def recreate(self, dataset: Dataset, collection_params):
import logosdb

dim = dataset.config.vector_size
dist = DISTANCE_MAP.get(dataset.config.distance, logosdb.DIST_COSINE)
max_elements = collection_params.get("max_elements", 2_000_000)

db = logosdb.DB(self.path, dim=dim, distance=dist, max_elements=max_elements)
del db

with open(self.path + ".meta.json", "w") as f:
json.dump(
{"dim": dim, "distance": int(dist), "max_elements": max_elements}, f
)

return {}
41 changes: 41 additions & 0 deletions engine/clients/logosdb/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json
from typing import List, Tuple

import numpy as np

from dataset_reader.base_reader import Query
from engine.base_client.search import BaseSearcher

DEFAULT_PATH = "/tmp/logosdb_vdb_bench"


class LogosDBSearcher(BaseSearcher):
client = None

@classmethod
def init_client(
cls, host: str, distance, connection_params: dict, search_params: dict
):
import logosdb

path = connection_params.get("path", DEFAULT_PATH)
with open(path + ".meta.json") as f:
meta = json.load(f)
cls.client = logosdb.DB(
path,
dim=meta["dim"],
distance=meta["distance"],
max_elements=meta.get("max_elements", 2_000_000),
)

@classmethod
def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]:
q = np.array(query.vector, dtype=np.float32)
hits = cls.client.search(q, top_k=top)
return [(int(h.text), h.score) for h in hits]

@classmethod
def delete_client(cls):
if cls.client is not None:
del cls.client
cls.client = None
45 changes: 45 additions & 0 deletions engine/clients/logosdb/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json
from typing import List

import numpy as np

from dataset_reader.base_reader import Record
from engine.base_client.upload import BaseUploader

DEFAULT_PATH = "/tmp/logosdb_vdb_bench"


class LogosDBUploader(BaseUploader):
client = None
upload_params = {}

@classmethod
def init_client(cls, host, distance, connection_params: dict, upload_params: dict):
import logosdb

path = connection_params.get("path", DEFAULT_PATH)
with open(path + ".meta.json") as f:
meta = json.load(f)
cls.client = logosdb.DB(
path,
dim=meta["dim"],
distance=meta["distance"],
max_elements=meta.get("max_elements", 2_000_000),
)
cls.upload_params = upload_params

@classmethod
def upload_batch(cls, batch: List[Record]):
vectors = np.array([r.vector for r in batch], dtype=np.float32)
texts = [str(r.id) for r in batch]
cls.client.put_batch(vectors, texts=texts)

@classmethod
def post_upload(cls, distance):
return {}

@classmethod
def delete_client(cls):
if cls.client is not None:
del cls.client
cls.client = None
22 changes: 22 additions & 0 deletions experiments/configurations/logosdb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"name": "logosdb-m16-ef200",
"engine": "logosdb",
"connection_params": {
"path": "/tmp/logosdb_vdb_bench"
},
"collection_params": {
"max_elements": 2000000
},
"upload_params": {
"parallel": 1,
"batch_size": 1000
},
"search_params": [
{
"parallel": 1,
"top": 10
}
]
}
]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ opensearch-py = "^2.3.2"
tqdm = "^4.66.1"
psycopg = {extras = ["binary"], version = "^3.1.17"}
pgvector = "^0.2.4"
logosdb = ">=0.9.0"

[tool.poetry.group.dev.dependencies]
pre-commit = "^2.20.0"
Expand Down