Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
"numpy>=1.26.0",
"portalocker>=2.8.0", # Cross-platform file locking
"metaclass-registry",
"imageio>=2.37.0",
"zarr>=2.18.0,<3.0", # Required for ZarrStorageBackend
"ome-zarr>=0.11.0", # Required for OME-ZARR HCS compliance
]
Expand Down Expand Up @@ -197,4 +198,4 @@ ignore = [
]

[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"] # unused imports
"__init__.py" = ["F401"] # unused imports
13 changes: 7 additions & 6 deletions src/polystore/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,15 +546,16 @@ def reset_memory_backend() -> None:
# Clear files from existing memory backend while preserving directories
memory_backend = storage_registry[Backend.MEMORY.value]

# DEBUG: Log what's in memory before clearing
existing_keys = list(memory_backend._memory_store.keys())
logger.info(f"🔍 VFS_CLEAR: Memory backend has {len(existing_keys)} entries BEFORE clear")
logger.info(f"🔍 VFS_CLEAR: First 10 keys: {existing_keys[:10]}")
logger.debug("Memory backend has %s entries before clear", len(existing_keys))
logger.debug("First memory backend keys before clear: %s", existing_keys[:10])

memory_backend.clear_files_only()

# DEBUG: Log what's in memory after clearing
remaining_keys = list(memory_backend._memory_store.keys())
logger.info(f"🔍 VFS_CLEAR: Memory backend has {len(remaining_keys)} entries AFTER clear (directories only)")
logger.info(f"🔍 VFS_CLEAR: First 10 remaining keys: {remaining_keys[:10]}")
logger.debug(
"Memory backend has %s entries after clear (directories only)",
len(remaining_keys),
)
logger.debug("First memory backend keys after clear: %s", remaining_keys[:10])
logger.info("Memory backend reset - files cleared, directories preserved")
15 changes: 13 additions & 2 deletions src/polystore/disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import shutil
import importlib
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Set, Union

Expand All @@ -23,7 +24,7 @@

def optional_import(module_name):
try:
return __import__(module_name)
return importlib.import_module(module_name)
except ImportError:
return None

Expand All @@ -44,6 +45,7 @@ def optional_import(module_name):
cupy = get_cupy()
tf = get_tf()
tifffile = optional_import("tifffile")
imageio = optional_import("imageio.v3")

# Optional arraybridge integration for memory conversion
try:
Expand Down Expand Up @@ -99,6 +101,7 @@ def _register_formats(self):

# Complex formats - use custom handlers
(FileFormat.TIFF, tifffile, self._tiff_writer, self._tiff_reader),
(FileFormat.RASTER_IMAGE, imageio, self._image_writer, self._image_reader),
(FileFormat.TEXT, True, self._text_writer, self._text_reader),
(FileFormat.JSON, True, self._json_writer, self._json_reader),
(FileFormat.CSV, True, self._csv_writer, self._csv_reader),
Expand Down Expand Up @@ -164,6 +167,14 @@ def _tiff_reader(self, path):
else:
return tifffile.imread(str(path))

def _image_writer(self, path, data, **kwargs):
"""Write standard raster images using imageio."""
imageio.imwrite(path, np.asarray(data))

def _image_reader(self, path):
Comment on lines +170 to +174
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raster image support is introduced here, but the test suite doesn’t cover saving/loading any of the new extensions (e.g., .png/.jpg/.bmp). Please add pytest coverage that round-trips a small array through at least one raster format and asserts the extension is registered/usable (and ideally verifies case-insensitive extension handling, e.g., '.PNG').

Copilot uses AI. Check for mistakes.
"""Read standard raster images using imageio."""
return imageio.imread(path)
Comment on lines +174 to +176
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DiskStorageBackend.load() calls the registered reader as reader(disk_path, **kwargs). The new _image_reader does not accept **kwargs, so any non-empty kwargs (even benign ones) will raise a TypeError when loading raster images. Please update _image_reader to accept **kwargs (and either ignore them or pass supported options through to imageio.imread).

Suggested change
def _image_reader(self, path):
"""Read standard raster images using imageio."""
return imageio.imread(path)
def _image_reader(self, path, **kwargs):
"""Read standard raster images using imageio."""
return imageio.imread(path, **kwargs)

Copilot uses AI. Check for mistakes.

def _text_writer(self, path, data, **kwargs):
"""Write text data to file. Accepts and ignores extra kwargs for compatibility."""
path.write_text(str(data))
Expand Down Expand Up @@ -261,7 +272,7 @@ def load(self, file_path: Union[str, Path], **kwargs) -> Any:
ext = disk_path.suffix.lower()

if not self.format_registry.is_registered(ext):
raise ValueError(f"No writer registered for extension '{ext}'")
raise ValueError(f"No reader registered for extension '{ext}'")

try:
reader = self.format_registry.get_reader(ext)
Expand Down
11 changes: 10 additions & 1 deletion src/polystore/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class FileFormat(Enum):

# Image formats
TIFF = "tiff"
RASTER_IMAGE = "raster_image"

# Data formats
CSV = "csv"
Expand All @@ -44,14 +45,22 @@ def extensions(self):
FileFormat.TENSORFLOW: [".tf"],
FileFormat.ZARR: [".zarr"],
FileFormat.TIFF: [".tif", ".tiff"],
FileFormat.RASTER_IMAGE: [".bmp", ".gif", ".jpeg", ".jpg", ".png"],
FileFormat.CSV: [".csv"],
FileFormat.JSON: [".json"],
FileFormat.TEXT: [".txt"],
FileFormat.ROI: [".roi.zip"],
}

# Default image extensions
DEFAULT_IMAGE_EXTENSIONS = {".tif", ".tiff", ".TIF", ".TIFF"}
DEFAULT_IMAGE_EXTENSIONS = {
extension
for extensions in (
FILE_FORMAT_EXTENSIONS[FileFormat.TIFF],
FILE_FORMAT_EXTENSIONS[FileFormat.RASTER_IMAGE],
)
for extension in extensions
}


def get_format_from_extension(ext: str) -> FileFormat:
Expand Down
8 changes: 7 additions & 1 deletion src/polystore/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ def list_files(
if self._memory_store[dir_key] is not None:
raise NotADirectoryError(f"Path is not a directory: {directory}")

lowercase_extensions = (
None if extensions is None else {extension.lower() for extension in extensions}
)
result = []
dir_prefix = dir_key + "/" if not dir_key.endswith("/") else dir_key

Expand All @@ -159,7 +162,10 @@ def list_files(
filename = Path(rel_path).name
# If pattern is None, match all files
if pattern is None or fnmatch(filename, pattern):
if not extensions or Path(filename).suffix in extensions:
if (
lowercase_extensions is None
or Path(filename).suffix.lower() in lowercase_extensions
):
# Calculate depth for breadth-first sorting
depth = rel_path.count('/')
result.append((Path(path), depth))
Expand Down
26 changes: 18 additions & 8 deletions src/polystore/virtual_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,20 @@ def list_files(self, directory: Union[str, Path], pattern: Optional[str] = None,
if self._mapping_cache is None:
self._load_mapping()

logger.info(f"VirtualWorkspace.list_files called: directory={directory}, recursive={recursive}, pattern={pattern}, extensions={extensions}")
logger.info(f" plate_root={self.plate_root}")
logger.info(f" relative_dir_str='{relative_dir_str}'")
logger.info(f" mapping has {len(self._mapping_cache)} entries")
logger.debug(
"VirtualWorkspace.list_files directory=%s recursive=%s pattern=%s extensions=%s",
directory,
recursive,
pattern,
extensions,
)
logger.debug(" plate_root=%s", self.plate_root)
logger.debug(" relative_dir_str=%r", relative_dir_str)
logger.debug(" mapping has %s entries", len(self._mapping_cache))

lowercase_extensions = (
None if extensions is None else {ext.lower() for ext in extensions}
)

# Filter paths in this directory
results = []
Expand All @@ -230,20 +240,20 @@ def list_files(self, directory: Union[str, Path], pattern: Optional[str] = None,
vpath = Path(virtual_relative)
if pattern and not fnmatch(vpath.name, pattern):
continue
if extensions and vpath.suffix not in extensions:
if lowercase_extensions and vpath.suffix.lower() not in lowercase_extensions:
continue

# Return absolute path
results.append(str(self.plate_root / virtual_relative))

logger.info(f" VirtualWorkspace.list_files returning {len(results)} files")
logger.debug(" VirtualWorkspace.list_files returning %s files", len(results))
if len(results) == 0 and len(self._mapping_cache) > 0:
# Log first few mapping keys to help debug
sample_keys = list(self._mapping_cache.keys())[:3]
logger.info(f" Sample mapping keys: {sample_keys}")
logger.debug(" Sample mapping keys: %s", sample_keys)
if not recursive and relative_dir_str == '':
sample_parents = [str(Path(k).parent).replace('\\', '/') for k in sample_keys]
logger.info(f" Sample parent dirs: {sample_parents}")
logger.debug(" Sample parent dirs: %s", sample_parents)
logger.info(f" Expected parent to match: '{relative_dir_str}'")

return sorted(results)
Expand Down
11 changes: 11 additions & 0 deletions tests/test_memory_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ def test_list_files_with_extension_filter(self):
npy_files = self.backend.list_files("/test", extensions={".npy"})
assert len(npy_files) == 2

def test_list_files_extension_filter_is_case_insensitive(self):
"""Test extension filtering matches backend contract case-insensitively."""
self.backend.save(np.array([1]), "/test/image.TIF")
self.backend.save(np.array([2]), "/test/image.tif")
self.backend.save("text", "/test/notes.TXT")

tif_files = self.backend.list_files("/test", extensions={".tif"})

assert len(tif_files) == 2
assert {path.name for path in tif_files} == {"image.TIF", "image.tif"}

def test_list_files_recursive(self):
"""Test recursive file listing."""
# Create files in multiple levels
Expand Down
Loading