Skip to content
This repository was archived by the owner on Aug 27, 2025. It is now read-only.

Commit 114da2c

Browse files
write backbone
1 parent 8b7e32a commit 114da2c

8 files changed

Lines changed: 98 additions & 67 deletions

File tree

spatialdata_xenium_explorer/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,8 @@
22

33
__version__ = importlib.metadata.version("spatialdata_xenium_explorer")
44

5+
from .images import write_multiscale
6+
from .points import write_transcripts
7+
from .table import write_cell_categories, write_gene_counts
8+
from .shapes import write_polygons
59
from .converter import write

spatialdata_xenium_explorer/constants.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
1-
class Versions:
2-
EXPERIMENT = [2, 0]
3-
GROUPS = [5, 0]
4-
CELL_CATEGORIES = [1, 0]
1+
class FileNames:
2+
IMAGE = "morphology.ome.tif"
3+
POINTS = "transcripts.zarr.zip"
4+
SHAPES = "cells.zarr.zip"
5+
TABLE = "cell_feature_matrix.zarr.zip"
6+
CELL_CATEGORIES = "analysis.zarr.zip"
7+
METADATA = "experiment.xenium"
58

69

710
class ExplorerConstants:
811
GRID_SIZE = 250
912
QUALITY_SCORE = 40
1013

1114

12-
def cell_categories_attrs():
15+
class Versions:
16+
EXPERIMENT = [2, 0]
17+
GROUPS = [5, 0]
18+
CELL_CATEGORIES = [1, 0]
19+
20+
21+
def cell_categories_attrs() -> dict:
1322
return {
1423
"major_version": Versions.CELL_CATEGORIES[0],
1524
"minor_version": Versions.CELL_CATEGORIES[1],
@@ -19,7 +28,7 @@ def cell_categories_attrs():
1928
}
2029

2130

22-
def cell_summary_attrs():
31+
def cell_summary_attrs() -> dict:
2332
return {
2433
"column_descriptions": [
2534
"Cell centroid in X",
@@ -42,7 +51,7 @@ def cell_summary_attrs():
4251
}
4352

4453

45-
def group_attrs():
54+
def group_attrs() -> dict:
4655
return {
4756
"major_version": Versions.GROUPS[0],
4857
"minor_version": Versions.GROUPS[1],
@@ -57,13 +66,17 @@ def group_attrs():
5766
}
5867

5968

60-
def experiment_dict():
69+
def experiment_dict(run_name: str, region_name: str, num_cells: int) -> dict:
6170
return {
6271
"major_version": Versions.EXPERIMENT[0],
6372
"minor_version": Versions.EXPERIMENT[1],
73+
"run_name": run_name,
74+
"region_name": region_name,
75+
"experiment_uuid": "N/A",
76+
"panel_tissue_type": "N/A",
6477
"run_start_time": "N/A",
6578
"preservation_method": "N/A",
66-
"num_cells": 0,
79+
"num_cells": num_cells,
6780
"transcripts_per_cell": 0,
6881
"transcripts_per_100um": 0,
6982
"cassette_name": "N/A",
@@ -98,7 +111,7 @@ def experiment_dict():
98111
}
99112

100113

101-
def image_options():
114+
def image_options() -> dict:
102115
return {
103116
"photometric": "minisblack",
104117
"tile": (1024, 1024),
Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,41 @@
11
import json
2+
from pathlib import Path
23

3-
from .constants import EXPERIMENT
4+
from spatialdata import SpatialData
45

6+
from . import (
7+
write_cell_categories,
8+
write_gene_counts,
9+
write_multiscale,
10+
write_polygons,
11+
write_transcripts,
12+
)
13+
from .constants import FileNames, experiment_dict
514

6-
def write_experiment(path, run_name, tissue, region_name, uuid):
7-
EXPERIMENT["run_name"] = run_name
8-
EXPERIMENT["panel_tissue_type"] = tissue
9-
EXPERIMENT["region_name"] = region_name
10-
EXPERIMENT["experiment_uuid"] = uuid
1115

12-
with open(path, "w") as f:
16+
def write(path: str, sdata: SpatialData, image_key: str, gene_column: str) -> None:
17+
path: Path = Path(path)
18+
assert (
19+
not path.exists() or path.is_dir()
20+
), f"A path to an existing file was provided. It should be a path to a directory."
21+
22+
path.mkdir(parents=True, exist_ok=True)
23+
24+
adata = sdata.table
25+
26+
EXPERIMENT = experiment_dict(..., ..., adata.n_obs)
27+
with open(path / FileNames.METADATA, "w") as f:
1328
json.dump(EXPERIMENT, f, indent=4)
1429

30+
write_gene_counts(path / FileNames.TABLE, adata)
31+
write_cell_categories(path / FileNames.CELL_CATEGORIES, adata)
32+
33+
polygons = sdata.shapes["..."]
34+
# TODO: transform polygon coords to pixel
35+
write_polygons(path / FileNames.SHAPES, polygons)
36+
37+
# TODO : make it memory efficient
38+
write_multiscale(path / FileNames.IMAGE, sdata.images[image_key])
1539

16-
def write(path, sdata):
17-
...
40+
df = sdata.points["..."]
41+
write_transcripts(path / FileNames.POINTS, df, gene_column)

spatialdata_xenium_explorer/images.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .constants import image_metadata, image_options
88

99

10-
def to_uint8(arr):
10+
def to_uint8(arr: np.ndarray) -> np.ndarray:
1111
print(f"Writing image of shape {arr.shape}")
1212
return (arr // 256).astype(np.uint8)
1313

spatialdata_xenium_explorer/points.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,13 @@ def subsample_indices(n_samples, factor: int = 4):
1616
def write_transcripts(
1717
path: Path,
1818
df: pd.DataFrame,
19-
x: str = "x",
20-
y: str = "y",
2119
gene: str = "gene",
2220
max_levels: int = 15,
2321
):
2422
num_transcripts = len(df)
2523
df[gene] = df[gene].astype("category")
2624

27-
location = df[[x, y]]
25+
location = df[["x", "y"]]
2826
location = np.concatenate([location, np.zeros((num_transcripts, 1))], axis=1)
2927

3028
xmax, ymax = location[:, :2].max(axis=0)

spatialdata_xenium_explorer/shapes.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,42 @@
11
from math import ceil
22
from pathlib import Path
3+
from typing import Iterable
34

45
import numpy as np
56
import zarr
67
from shapely.geometry import Polygon
78

89
from .constants import cell_summary_attrs, group_attrs
9-
from .utils import pad
1010

1111

12-
def write_polygons(path: Path, polygons: list[Polygon], area: np.ndarray) -> None:
13-
coordinates = np.stack([pad(p, 3, 13) for p in polygons])
12+
def pad_polygon(polygon: Polygon, max_vertices: int, tolerance: float = 1) -> np.ndarray:
13+
n_vertices = len(polygon.exterior.coords)
14+
assert n_vertices >= 3
15+
16+
coords = polygon.exterior.coords._coords
17+
18+
if n_vertices == max_vertices:
19+
return coords.flatten()
20+
21+
if n_vertices < max_vertices:
22+
return np.pad(
23+
coords, ((0, max_vertices - n_vertices), (0, 0)), mode="edge"
24+
).flatten()
25+
26+
# TODO: improve it: how to choose the right tolerance?
27+
polygon = polygon.simplify(tolerance=tolerance)
28+
return pad_polygon(polygon, max_vertices, tolerance + 1)
29+
30+
31+
def write_polygons(path: Path, polygons: Iterable[Polygon], max_vertices: int) -> None:
32+
coordinates = np.stack([pad_polygon(p, max_vertices) for p in polygons])
1433
coordinates /= 4.705882
1534

1635
num_cells = len(coordinates)
1736
cells_fourth = ceil(num_cells / 4)
1837
cells_half = ceil(num_cells / 2)
1938

20-
CELLS_SUMMARY_ATTRS = cell_summary_attrs()
2139
GROUP_ATTRS = group_attrs()
22-
2340
GROUP_ATTRS["number_cells"] = num_cells
2441

2542
polygon_vertices = np.stack([coordinates, coordinates])
@@ -42,14 +59,14 @@ def write_polygons(path: Path, polygons: list[Polygon], area: np.ndarray) -> Non
4259
g.array("cell_id", cell_id, dtype="uint32", chunks=(cells_half, 1))
4360

4461
cell_summary = np.zeros((num_cells, 7))
45-
cell_summary[:, 2] = area
62+
cell_summary[:, 2] = [p.area for p in polygons]
4663
g.array(
4764
"cell_summary",
4865
cell_summary,
4966
dtype="float64",
5067
chunks=(num_cells, 1),
5168
)
52-
g["cell_summary"].attrs.put(CELLS_SUMMARY_ATTRS)
69+
g["cell_summary"].attrs.put(cell_summary_attrs())
5370

5471
g.array(
5572
"polygon_num_vertices",

spatialdata_xenium_explorer/table.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import anndata
21
import numpy as np
32
import zarr
3+
from anndata import AnnData
44

55
from .constants import cell_categories_attrs
66

77

8-
def write_gene_counts(path, adata: anndata.AnnData):
8+
def write_gene_counts(path: str, adata: AnnData) -> None:
99
counts = adata.layers["counts"]
1010

1111
feature_keys = list(adata.var_names) + ["Total transcripts"]
@@ -54,7 +54,9 @@ def write_gene_counts(path, adata: anndata.AnnData):
5454
cells_group.array("indptr", indptr, dtype="uint32", chunks=indptr.shape)
5555

5656

57-
def add_group(root: zarr.Group, index: int, values: np.ndarray, categories: list[str]):
57+
def _write_categorical_column(
58+
root: zarr.Group, index: int, values: np.ndarray, categories: list[str]
59+
) -> None:
5860
group = root.create_group(index)
5961
values_indices = [np.where(values == cat)[0] for cat in categories]
6062
values_cum_len = np.cumsum([len(indices) for indices in values_indices])
@@ -66,23 +68,23 @@ def add_group(root: zarr.Group, index: int, values: np.ndarray, categories: list
6668
group.array("indptr", indptr, dtype="uint32", chunks=(len(indptr),))
6769

6870

69-
def write_cell_categories(path: str, adata: anndata.AnnData):
70-
categorical_columns = [
71-
name for name, cat in adata.obs.dtypes.items() if cat == "category"
72-
]
71+
def write_cell_categories(path: str, adata: AnnData) -> None:
72+
cat_columns = [name for name, cat in adata.obs.dtypes.items() if cat == "category"]
73+
74+
print(f"Saving {len(cat_columns)} cell categories: {', '.join(cat_columns)}")
7375

7476
ATTRS = cell_categories_attrs()
75-
ATTRS["number_groupings"] = len(categorical_columns)
77+
ATTRS["number_groupings"] = len(cat_columns)
7678

7779
with zarr.ZipStore(path, mode="w") as store:
7880
g = zarr.group(store=store)
7981
cell_groups = g.create_group("cell_groups")
8082

81-
for i, name in enumerate(categorical_columns):
83+
for i, name in enumerate(cat_columns):
8284
categories = list(adata.obs[name].cat.categories)
8385
ATTRS["grouping_names"].append(name)
8486
ATTRS["group_names"].append(categories)
8587

86-
add_group(cell_groups, i, adata.obs[name], categories)
88+
_write_categorical_column(cell_groups, i, adata.obs[name], categories)
8789

8890
cell_groups.attrs.put(ATTRS)

spatialdata_xenium_explorer/utils.py

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)