Skip to content
This repository was archived by the owner on Aug 27, 2025. It is now read-only.

Commit 4bc4c2e

Browse files
first version working
1 parent 8e2d0b8 commit 4bc4c2e

7 files changed

Lines changed: 37 additions & 32 deletions

File tree

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,17 @@ import spatialdata
1414
import spatialdata_xenium_explorer
1515

1616
sdata = spatialdata.read_zarr("...")
17-
image_key = "..." # The name of the MultiscaleSpatialImage to be exported
1817

19-
spatialdata_xenium_explorer.write("/path/to/directory", sdata, image_key)
18+
spatialdata_xenium_explorer.write("/path/to/directory", sdata, image_key, shapes_key, points_key, gene_column)
2019
```
2120

22-
This will create up to 6 files, among which a file called `experiment.xenium`. Double-click on this file to open it on the [Xenium Explorer](https://www.10xgenomics.com/support/software/xenium-explorer/downloads) (make sure you have the latest version of the software).
21+
For more details about the arguments, see the [function docstrings](https://github.com/quentinblampey/spatialdata_xenium_explorer/blob/master/spatialdata_xenium_explorer/converter.py#L29).
22+
23+
This will create up to 6 files, among which a file called `experiment.xenium`. Double-click on this file to open it on the [Xenium Explorer](https://www.10xgenomics.com/support/software/xenium-explorer/downloads) (make sure you have the latest version of the software).
24+
25+
## Future improvements
26+
27+
- Support all types of images (not just `MultiscaleSpatialImage`)
28+
- Better user experience (less arguments)
29+
- Write `.tif` image without loading the spatial image in memory
30+
- Write transcripts without computing the whole coordinates
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ class FileNames:
1010
class ExplorerConstants:
1111
GRID_SIZE = 250
1212
QUALITY_SCORE = 40
13+
MICRONS_TO_PIXELS = 4.705882
1314

1415

1516
class Versions:

spatialdata_xenium_explorer/converter.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
write_polygons,
1212
write_transcripts,
1313
)
14-
from .constants import FileNames, experiment_dict
14+
from ._constants import FileNames, experiment_dict
1515

1616

17-
def _order_instances(sdata: SpatialData, shapes_key: str):
17+
def _reorder_instances(sdata: SpatialData, shapes_key: str):
1818
adata = sdata.table
1919

2020
instance_key = adata.uns["spatialdata_attrs"]["instance_key"]
@@ -34,6 +34,7 @@ def write(
3434
points_key: str,
3535
gene_column: str,
3636
layer: str | None = None,
37+
polygon_max_vertices: int = 13,
3738
) -> None:
3839
"""
3940
Transform a SpatialData object into inputs for the Xenium Explorer.
@@ -47,6 +48,7 @@ def write(
4748
points_key: Name of the transcripts (key of `sdata.points`).
4849
gene_column: Column name of the points dataframe containing the gene names.
4950
layer: Layer of `sdata.table` where the gene counts are saved. If `None`, uses `sdata.table.X`.
51+
polygon_max_vertices: Maximum number of vertices for the cell polygons.
5052
"""
5153
path: Path = Path(path)
5254
assert (
@@ -55,7 +57,7 @@ def write(
5557

5658
path.mkdir(parents=True, exist_ok=True)
5759

58-
adata = _order_instances(sdata, shapes_key)
60+
adata = _reorder_instances(sdata, shapes_key)
5961

6062
EXPERIMENT = experiment_dict(image_key, shapes_key, adata.n_obs)
6163
with open(path / FileNames.METADATA, "w") as f:
@@ -68,9 +70,9 @@ def write(
6870
set_transformation(sdata.images[image_key], Identity(), pixels_cs)
6971

7072
gdf = sdata.transform_element_to_coordinate_system(sdata.shapes[shapes_key], pixels_cs)
71-
write_polygons(path / FileNames.SHAPES, gdf.geometry)
72-
73-
write_multiscale(path / FileNames.IMAGE, sdata.images[image_key])
73+
write_polygons(path / FileNames.SHAPES, gdf.geometry, polygon_max_vertices)
7474

7575
df = sdata.transform_element_to_coordinate_system(sdata.points[points_key], pixels_cs)
7676
write_transcripts(path / FileNames.POINTS, df, gene_column)
77+
78+
write_multiscale(path / FileNames.IMAGE, sdata.images[image_key])

spatialdata_xenium_explorer/images.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
import tifffile as tf
33
from multiscale_spatial_image import MultiscaleSpatialImage
44

5-
from .constants import image_metadata, image_options
5+
from ._constants import image_metadata, image_options
66

77

88
def _astype_uint8(arr: np.ndarray) -> np.ndarray:
9+
print(f" Image of shape {arr.shape}")
910
assert np.issubdtype(
1011
arr.dtype, np.integer
1112
), f"The image dtype has to be an integer dtype. Found {arr.dtype}"
@@ -22,6 +23,7 @@ def write_multiscale(
2223
multiscale: MultiscaleSpatialImage,
2324
pixelsize: float = 0.2125,
2425
):
26+
print("Writing multiscale image")
2527
scale_names = list(multiscale.children)
2628
channel_names = list(multiscale[scale_names[0]].c.values)
2729

spatialdata_xenium_explorer/points.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66
import zarr
77

8-
from .constants import ExplorerConstants
8+
from ._constants import ExplorerConstants
99

1010

1111
def subsample_indices(n_samples, factor: int = 4):
@@ -20,33 +20,28 @@ def write_transcripts(
2020
max_levels: int = 15,
2121
):
2222
# TODO: make everything using dask instead of pandas
23+
print(f"Writing {len(df)} transcripts")
2324
df = df.compute()
2425

2526
num_transcripts = len(df)
2627
df[gene] = df[gene].astype("category")
2728

2829
location = df[["x", "y"]]
30+
location /= ExplorerConstants.MICRONS_TO_PIXELS
2931
location = np.concatenate([location, np.zeros((num_transcripts, 1))], axis=1)
3032

3133
xmax, ymax = location[:, :2].max(axis=0)
3234

33-
assert location[:, 0].min() >= 0
34-
assert location[:, 1].min() >= 0
35-
3635
gene_names = list(df[gene].cat.categories)
3736
num_genes = len(gene_names)
3837

3938
codeword_gene_mapping = list(range(num_genes))
4039

4140
valid = np.ones((num_transcripts, 1))
4241
uuid = np.stack([np.arange(num_transcripts), np.full(num_transcripts, 65535)], axis=1)
43-
transcript_id = np.stack(
44-
[np.arange(num_transcripts), np.full(num_transcripts, 65535)], axis=1
45-
)
42+
transcript_id = np.stack([np.arange(num_transcripts), np.full(num_transcripts, 65535)], axis=1)
4643
gene_identity = df[gene].cat.codes.values[:, None]
47-
codeword_identity = np.stack(
48-
[gene_identity[:, 0], np.full(num_transcripts, 65535)], axis=1
49-
)
44+
codeword_identity = np.stack([gene_identity[:, 0], np.full(num_transcripts, 65535)], axis=1)
5045
status = np.zeros((num_transcripts, 1))
5146
quality_score = np.full((num_transcripts, 1), ExplorerConstants.QUALITY_SCORE)
5247

@@ -55,9 +50,7 @@ def write_transcripts(
5550
"codeword_gene_mapping": codeword_gene_mapping,
5651
"codeword_gene_names": gene_names,
5752
"gene_names": gene_names,
58-
"gene_index_map": {
59-
name: index for name, index in zip(gene_names, codeword_gene_mapping)
60-
},
53+
"gene_index_map": {name: index for name, index in zip(gene_names, codeword_gene_mapping)},
6154
"number_genes": num_genes,
6255
"spatial_units": "micron",
6356
"coordinate_space": "refined-final_global_micron",
@@ -85,12 +78,11 @@ def write_transcripts(
8578
grids = g.create_group("grids")
8679

8780
for level in range(max_levels):
81+
print(f" Level {level}: {len(location)} transcripts")
8882
level_group = grids.create_group(level)
8983

9084
tile_size = ExplorerConstants.GRID_SIZE * 2**level
9185

92-
print(f"Level {level}: {len(location)} transcripts")
93-
9486
indices = np.floor(location[:, :2] / tile_size).clip(0).astype(int)
9587
tiles_str_indices = np.array([f"{tx},{ty}" for (tx, ty) in indices])
9688

spatialdata_xenium_explorer/shapes.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import zarr
77
from shapely.geometry import Polygon
88

9-
from .constants import cell_summary_attrs, group_attrs
9+
from ._constants import ExplorerConstants, cell_summary_attrs, group_attrs
1010

1111

1212
def pad_polygon(polygon: Polygon, max_vertices: int, tolerance: float = 1) -> np.ndarray:
@@ -19,18 +19,17 @@ def pad_polygon(polygon: Polygon, max_vertices: int, tolerance: float = 1) -> np
1919
return coords.flatten()
2020

2121
if n_vertices < max_vertices:
22-
return np.pad(
23-
coords, ((0, max_vertices - n_vertices), (0, 0)), mode="edge"
24-
).flatten()
22+
return np.pad(coords, ((0, max_vertices - n_vertices), (0, 0)), mode="edge").flatten()
2523

2624
# TODO: improve it: how to choose the right tolerance?
2725
polygon = polygon.simplify(tolerance=tolerance)
2826
return pad_polygon(polygon, max_vertices, tolerance + 1)
2927

3028

3129
def write_polygons(path: Path, polygons: Iterable[Polygon], max_vertices: int) -> None:
30+
print(f"Writing {len(polygons)} cell polygons")
3231
coordinates = np.stack([pad_polygon(p, max_vertices) for p in polygons])
33-
coordinates /= 4.705882
32+
coordinates /= ExplorerConstants.MICRONS_TO_PIXELS
3433

3534
num_cells = len(coordinates)
3635
cells_fourth = ceil(num_cells / 4)

spatialdata_xenium_explorer/table.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
from anndata import AnnData
44
from scipy.sparse import csr_matrix
55

6-
from .constants import cell_categories_attrs
6+
from ._constants import cell_categories_attrs
77

88

99
def write_gene_counts(path: str, adata: AnnData, layer: str | None) -> None:
10+
print(f"Writing table of {adata.n_vars} genes")
1011
counts = adata.X if layer is None else adata.layers[layer]
1112
counts = csr_matrix(counts)
1213

@@ -74,7 +75,7 @@ def write_cell_categories(path: str, adata: AnnData) -> None:
7475
# TODO: consider also columns that can be transformed to a categorical column?
7576
cat_columns = [name for name, cat in adata.obs.dtypes.items() if cat == "category"]
7677

77-
print(f"Saving {len(cat_columns)} cell categories: {', '.join(cat_columns)}")
78+
print(f"Writing {len(cat_columns)} cell categories: {', '.join(cat_columns)}")
7879

7980
ATTRS = cell_categories_attrs()
8081
ATTRS["number_groupings"] = len(cat_columns)

0 commit comments

Comments
 (0)