Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .vscode/project-words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ appleboy
colour
easyocr
embedder
fabricai
lancedb
mcpserver
metadatas
Expand Down
21 changes: 21 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.11-slim
WORKDIR /app

# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential curl && \
rm -rf /var/lib/apt/lists/*

# Poetry setup
RUN pip install poetry && \
poetry config virtualenvs.create false

# Step 1: Heavy Dependencies (Cached)
COPY pyproject.toml poetry.lock* LICENSE-PYTHON README.md ./
RUN poetry install --all-extras --with dev --no-interaction --no-ansi --no-root

# Step 2: Your Code
COPY . .

# Step 3: Fast metadata install (to enable 'fabric' command)
RUN poetry install --all-extras --with dev --no-interaction --no-ansi
3 changes: 2 additions & 1 deletion backend/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
ENVIRONMENT = os.getenv("ENVIRONMENT", "development")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
API_KEY = os.getenv("INTERNAL_API_KEY", "abcd1234")
BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")

IS_PROD = ENVIRONMENT == "production"
IS_DEV = ENVIRONMENT == "development"


PROJECT_DIR = Path(__file__).parent
RELATIVE_GENERATED_FOLDER = "s3://threadzip-bucket/images/"
RELATIVE_GENERATED_FOLDER = f"s3://{BUCKET_NAME}/images/"
ASSETS = PROJECT_DIR / "assets"
UPLOAD_FOLDER_FABRIC = ASSETS / "search"
IMAGE_DIR = ASSETS / "images"
Expand Down
5 changes: 4 additions & 1 deletion backend/image_search/db/create_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_file_info(image_path: str) -> Optional[Dict[Any, Any]]:
return None


ALLOWED_ROOTS = {"stock", "fabric", "design", "single", "group"}
ALLOWED_ROOTS = {"stock", "fabric", "design", "product"}


def collect_image_data(root_folder: str) -> list:
Expand All @@ -79,6 +79,7 @@ def collect_image_data(root_folder: str) -> list:

if root_folder.startswith("s3://"):
parsed = urlparse(root_folder)
print(f"Parsed S3 URI: {parsed}")
bucket_name = parsed.netloc
base_prefix = parsed.path.lstrip("/")

Expand Down Expand Up @@ -279,6 +280,8 @@ def process_table(
logger.info(TABLE_MESSAGES.info.connecting)

db = lancedb.connect(database)
if not hasattr(db, "list_tables"):
db.list_tables = db.table_names
logger.info(TABLE_MESSAGES.info.available_tables.format(tables=db.list_tables()))
logger.info(TABLE_MESSAGES.info.looking_for_table.format(table_name=table_name))
logger.info(
Expand Down
21 changes: 15 additions & 6 deletions backend/image_search/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@


def run_vector_search(
table, schema, search_query: Any, limit: int = 6, category: List | None = None
table, schema, search_query: Any, limit: int = 6, category: List = []
) -> Tuple[List[Any], List[str]]:
print(category)
"""Optimized vector search with same interface but faster performance.

Args:
Expand All @@ -24,8 +25,9 @@ def run_vector_search(

# Perform the vector search
where_clause = " OR ".join(f"tag == '{c}'" for c in (category or []))

print("WHERE CLAUSE:", where_clause)
query = table.search(search_query)
print(f"Initial search query constructed: {query}") # Debug log

if category:
query = query.where(where_clause, prefilter=True)
Expand All @@ -41,12 +43,19 @@ def run_vector_search(
image_uris.append(result.image_uri)
# Optimized path processing
full_path = result.image_uri.replace("\\", "/")
parts = full_path.rsplit("/", 2)
if len(parts) >= 2:
image_paths.append(f"{parts[-2]}/{parts[-1]}")
# parts = full_path.rsplit("/", 2)
parts = full_path.split("/")
print(full_path, parts)

if "product" in parts:
idx = parts.index("product")
image_paths.append("/".join(parts[idx:]))
else:
image_paths.append("/".join(parts[-2:]))
# if len(parts) >= 2:
# image_paths.append(f"{parts[-2]}/{parts[-1]}")

# Debug timing (comment out in production)
search_time = time.perf_counter() - start_time
print(f"Vector search executed in {search_time:.2f}s")

return image_uris, image_paths
4 changes: 3 additions & 1 deletion backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class MyApp(FastAPI):
"https://lab.threadzip.com",
"https://app.threadzip.com",
"https://threadzip.com",
"https://recursivezero.github.io"
"https://recursivezero.github.io",
]

# Select origins based on the environment
Expand All @@ -89,6 +89,8 @@ class MyApp(FastAPI):
else (PROD_ORIGINS + DEV_ORIGINS if ALLOW_LOCAL else PROD_ORIGINS)
)

print(f"Allowed CORS origins: {origins}")


@asynccontextmanager
async def lifespan(app: MyApp):
Expand Down
523 changes: 267 additions & 256 deletions backend/poetry.lock

Large diffs are not rendered by default.

Loading
Loading