From 0f543dda84ed8bc8a345a350ef96d4d7ee74cadb Mon Sep 17 00:00:00 2001 From: alsmith151 Date: Wed, 27 May 2026 13:17:49 +0100 Subject: [PATCH 1/2] Enhance PlotNado with new features and improvements - Updated index.qmd to include a new example figure using GenomicFigure. - Added a new BED file for candidate peaks in the data directory. - Introduced a register_genome function to allow users to register custom genome files. - Enhanced Bigwig and Bed aesthetics to support binning options. - Improved label positioning for BED tracks with new BedLabelPosition enum. - Added synthetic data generation functions for testing and examples. - Updated tests to cover new genome registration functionality and ensure proper error handling. - Refactored code for better readability and maintainability. --- _quarto.yml | 4 +- docs/_examples.py | 145 +-------- docs/aesthetics.qmd | 124 ++++++-- docs/best_practices.qmd | 4 +- docs/examples.qmd | 164 +++++++++++ docs/figure_workflows.qmd | 13 +- docs/quickstart.qmd | 29 +- docs/quickstart_tracks.qmd | 39 ++- docs/recipes.qmd | 69 ++++- docs/reference.qmd | 19 +- docs/track_aliases.qmd | 6 +- docs/track_catalog.qmd | 23 +- index.qmd | 56 ++-- plotnado/__init__.py | 4 + plotnado/_kwargs.py | 4 + ...blueprint_monocyte_lyz_candidate_peaks.bed | 4 + plotnado/examples.py | 275 ++++++++++++++++++ plotnado/figure.pyi | 8 +- plotnado/figure_methods.py | 4 + plotnado/tracks/__init__.py | 2 + plotnado/tracks/axis.py | 3 +- plotnado/tracks/bed.py | 48 ++- plotnado/tracks/bigwig.py | 53 ++++ plotnado/tracks/enums.py | 8 + plotnado/tracks/genes.py | 43 ++- plotnado/tracks/peaks.py | 44 +-- pyproject.toml | 2 +- tests/test_genes.py | 97 ++++++ uv.lock | 2 +- 29 files changed, 1027 insertions(+), 269 deletions(-) create mode 100644 docs/examples.qmd create mode 100644 plotnado/data/blueprint_monocyte_lyz_candidate_peaks.bed create mode 100644 plotnado/examples.py diff --git a/_quarto.yml b/_quarto.yml index 2e4a7c5..d0ad03d 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -29,6 +29,8 @@ website: href: docs/quickstart.qmd - text: Track Construction href: docs/quickstart_tracks.qmd + - text: Examples + href: docs/examples.qmd - text: Guides menu: - text: Track Catalog @@ -73,7 +75,7 @@ format: toc-depth: 4 code-copy: true code-overflow: wrap - page-layout: full + page-layout: article execute: enabled: true diff --git a/docs/_examples.py b/docs/_examples.py index e2eb597..a470498 100644 --- a/docs/_examples.py +++ b/docs/_examples.py @@ -1,142 +1,3 @@ -from __future__ import annotations - -import importlib.util - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - -from plotnado import GenomicFigure - -REGION = "chr1:1,010,000-1,080,000" -WIDE_REGION = "chr1:1,000,000-1,110,000" - - -def has_module(module_name: str) -> bool: - return importlib.util.find_spec(module_name) is not None - - -def unavailable_figure(title: str, requirement: str): - fig, ax = plt.subplots(figsize=(9, 1.8)) - ax.axis("off") - ax.text( - 0.5, - 0.5, - f"{title} requires {requirement}", - ha="center", - va="center", - fontsize=10, - ) - plt.close(fig) - return fig - - -def signal( - start: int = 1_000_000, - end: int = 1_100_000, - step: int = 1_000, - phase: float = 0.0, - scale: float = 1.0, - baseline: float = 5.0, -) -> pd.DataFrame: - bins = np.arange(start, end, step) - values = scale * ( - baseline + 2.0 * np.sin(np.linspace(phase, 6 * np.pi + phase, bins.shape[0])) - ) - return pd.DataFrame({"chrom": "chr1", "start": bins, "end": bins + step, "value": values}) - - -def review_signal(scale: float = 1.0, phase: float = 0.0) -> pd.DataFrame: - bins = np.arange(1_000_000, 1_120_000, 1_000) - values = scale * (1.2 + np.sin(np.linspace(phase, 6 + phase, bins.shape[0]))) - return pd.DataFrame({"chrom": "chr1", "start": bins, "end": bins + 1_000, "value": values}) - - -def intervals() -> pd.DataFrame: - return pd.DataFrame( - { - "chrom": ["chr1", "chr1", "chr1", "chr1"], - "start": [1_008_000, 1_020_000, 1_050_000, 1_066_000], - "end": [1_014_000, 1_032_000, 1_061_000, 1_074_000], - "name": ["enhancer_a", "enhancer_b", "promoter", "domain"], - } - ) - - -def narrowpeaks() -> pd.DataFrame: - return pd.DataFrame( - { - "chrom": ["chr1", "chr1", "chr1"], - "start": [1_012_000, 1_038_000, 1_060_000], - "end": [1_018_000, 1_047_000, 1_070_000], - "name": ["np1", "np2", "np3"], - "score": [300, 700, 500], - "strand": [".", ".", "."], - "signalValue": [12.0, 48.0, 30.0], - "pValue": [5.2, 12.3, 8.1], - "qValue": [4.1, 10.0, 6.2], - "peak": [1200, 1800, 2200], - } - ) - - -def links() -> pd.DataFrame: - return pd.DataFrame( - { - "chrom1": ["chr1", "chr1", "chr1"], - "start1": [1_010_000, 1_022_000, 1_042_000], - "end1": [1_012_000, 1_024_000, 1_045_000], - "chrom2": ["chr1", "chr1", "chr1"], - "start2": [1_035_000, 1_054_000, 1_072_000], - "end2": [1_037_000, 1_056_000, 1_074_000], - "score": [2.2, 6.5, 9.8], - } - ) - - -def quickstart_figure() -> GenomicFigure: - fig = GenomicFigure(width=11, track_height=1.25) - fig.scalebar() - fig.axis() - fig.bigwig(signal(scale=1.15), title="Synthetic signal", style="fill", color="#1f77b4") - fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True) - return fig - - -def style_comparison() -> GenomicFigure: - fig = GenomicFigure(track_height=1.15) - fig.scalebar() - fig.bigwig(signal(phase=0.0), title="fill", style="fill", color="#1f77b4") - fig.bigwig(signal(phase=0.8), title="fragment", style="fragment", color="#d62728") - fig.bigwig( - signal(phase=1.6), - title="scatter", - style="scatter", - color="#2ca02c", - scatter_point_size=10, - ) - fig.bigwig(signal(phase=2.4), title="std", style="std", color="#9467bd") - return fig - - -def overlay_comparison() -> GenomicFigure: - fig = GenomicFigure(track_height=1.2) - fig.autoscale(True) - fig.highlight("chr1:1,032,000-1,046,000") - fig.highlight_style(color="#ffdd57", alpha=0.22) - fig.axis() - fig.bigwig(review_signal(2.0), title="Control", autoscale_group="signal", color="#1f77b4") - fig.bigwig( - review_signal(10.0, 1.2), - title="Treatment", - autoscale_group="signal", - color="#d62728", - ) - fig.overlay( - [review_signal(5.5, 2.0), review_signal(6.5, 2.8)], - title="Overlay", - autoscale_group="signal", - colors=["#2ca02c", "#9467bd"], - alpha=0.55, - ) - return fig +# Re-export shim — all names live in plotnado.examples. +# Quarto docs import from here; user code should import from plotnado.examples directly. +from plotnado.examples import * # noqa: F401, F403 diff --git a/docs/aesthetics.qmd b/docs/aesthetics.qmd index 20f8856..5a9192e 100644 --- a/docs/aesthetics.qmd +++ b/docs/aesthetics.qmd @@ -14,19 +14,62 @@ This page focuses on choices you can see: signal styles, color grouping, label p `style` changes how a quantitative signal is drawn. ```{python} -from docs._examples import REGION, style_comparison +#| fig-cap: "`fill` is the default for continuous signal, `fragment` emphasizes bins, `scatter` shows individual values, and `std` draws a band-style summary." +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal +# signal() returns a synthetic ChIP-seq-like DataFrame(chrom, start, end, value) +# In real use pass a BigWig path/URL or any DataFrame with those columns +# REGION is a string like "chr1:1,000,000-1,100,000" + + +fig = GenomicFigure(track_height=1.15) +fig.scalebar() +fig.bigwig(signal(phase=0.0), title="fill", style="fill", color="#1f77b4") +fig.bigwig(signal(phase=0.8), title="fragment", style="fragment", color="#d62728") +fig.bigwig(signal(phase=1.6), title="scatter", style="scatter", color="#2ca02c", scatter_point_size=10) +fig.bigwig(signal(phase=2.4), title="std", style="std", color="#9467bd") +fig.plot(REGION) +``` + +## Signal resolution -fig = style_comparison() +`n_bins` divides the plotted region into a fixed number of equal bins regardless of the source resolution. `bin_size` sets the bin width in base pairs instead. Both work with BigWig files and DataFrames; overlapping source intervals are averaged by overlap length. + +```{python} +#| fig-cap: "Coarser binning compresses the signal into broader summaries; finer bins preserve peak shape. The bottom track is the native 200 bp resolution from the synthetic data." +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal +# signal() returns a synthetic ChIP-seq-like DataFrame(chrom, start, end, value) at 200 bp bins +# In real use pass a BigWig path/URL or any DataFrame with those columns + +fig = GenomicFigure(track_height=1.15) +fig.scalebar() +fig.bigwig(signal(), title="bin_size=5000 (coarse)", style="fill", color="#9467bd", bin_size=5000) +fig.bigwig(signal(), title="bin_size=1000", style="fill", color="#d62728", bin_size=1000) +fig.bigwig(signal(), title="bin_size=200 (native)", style="fill", color="#1f77b4") +fig.bigwig(signal(), title="n_bins=50", style="fill", color="#2ca02c", n_bins=50) +fig.bigwig(signal(), title="n_bins=200", style="fill", color="#ff7f0e", n_bins=200) +fig.axis() fig.plot(REGION) ``` -Caption: `fill` is the default for continuous signal, `fragment` emphasizes bins, `scatter` shows individual values, and `std` draws a band-style summary. +Use `n_bins` when you want consistent resolution across regions of different sizes: + +```python +# Always 200 bins regardless of zoom level +fig.bigwig("signal.bw", n_bins=200) + +# Fixed bin width across any region +fig.bigwig("signal.bw", bin_size=500) +``` ## Color and alpha ```{python} +#| fig-cap: "Use opacity to reduce visual dominance when several panels are compared." from plotnado import GenomicFigure -from docs._examples import REGION, signal +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame fig = GenomicFigure(track_height=1.0) fig.bigwig(signal(), title="alpha=1.0", style="fill", color="#1f77b4", alpha=1.0) @@ -35,15 +78,16 @@ fig.bigwig(signal(phase=1.6), title="alpha=0.35", style="fill", color="#2ca02c", fig.plot(REGION) ``` -Caption: use opacity to reduce visual dominance when several panels are compared. - ## Autocolor and groups Use `autocolor()` once, then assign related tracks the same `color_group`. ```{python} +#| fig-cap: "Color groups keep semantically related tracks aligned without hard-coding every color." from plotnado import GenomicFigure -from docs._examples import REGION, intervals, signal +from plotnado.examples import REGION, intervals, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame fig = GenomicFigure(track_height=1.1).autocolor("Set2") fig.bigwig(signal(phase=0.0), title="Sample A signal", color_group="A") @@ -53,56 +97,86 @@ fig.bed(intervals().assign(name=["b1", "b2", "b3", "b4"]), title="Sample B peaks fig.plot(REGION) ``` -Caption: color groups keep semantically related tracks aligned without hard-coding every color. - ## Label placement ```{python} +#| fig-cap: "`title_location` anchors the label left or right; `label_box_enabled` adds a legibility box; `label_on_track` with the box is useful for compact multi-track figures." from plotnado import GenomicFigure -from docs._examples import REGION, signal +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame fig = GenomicFigure(track_height=1.0) -fig.bigwig(signal(), title="left margin", title_location="left") -fig.bigwig(signal(phase=0.7), title="right margin", title_location="right") -fig.bigwig(signal(phase=1.4), title="on track", label_on_track=True, label_box_enabled=True) +fig.bigwig(signal(), title="title_location='left'", title_location="left") +fig.bigwig(signal(phase=0.7), title="title_location='right'", title_location="right") +fig.bigwig(signal(phase=1.4), title="label_on_track, no box", label_on_track=True, label_box_enabled=False) +fig.bigwig(signal(phase=2.1), title="label_on_track + label_box_enabled", label_on_track=True, label_box_enabled=True) fig.plot(REGION) ``` -Caption: on-track labels are useful for compact figures, especially with a label box. - ## Overlay, autoscale, and highlights ```{python} -from docs._examples import REGION, overlay_comparison - -fig = overlay_comparison() +#| fig-cap: "`highlight()` marks a locus without changing y-limits; `autoscale_group` on the overlay synchronizes the panel with neighboring signal tracks." +from plotnado import GenomicFigure +from plotnado.examples import REGION, review_signal +# review_signal() → DataFrame(chrom, start, end, value) — replace with BigWig paths/URLs or DataFrames + +signal_a = review_signal(2.0) +signal_b = review_signal(10.0, 1.2) + + +fig = GenomicFigure(track_height=1.2) +fig.autoscale(True) +fig.highlight("chr1:1,032,000-1,046,000") +fig.highlight_style(color="#ffdd57", alpha=0.22) +fig.bigwig(signal_a, title="Control", autoscale_group="signal", color="#1f77b4") +fig.bigwig(signal_b, title="Treatment", autoscale_group="signal", color="#d62728") +fig.overlay( + [signal_a, signal_b], + title="Overlay", + autoscale_group="signal", + colors=["#1f77b4", "#d62728"], + alpha=0.55, +) fig.plot(REGION) ``` -Caption: `highlight()` marks a locus without changing y-limits; `autoscale_group` on the overlay synchronizes the panel with neighboring signal tracks. - ## BED, narrowPeak, and links ```{python} +#| fig-cap: "Interval, peak, and link tracks use tabular inputs with genomic coordinate columns." from plotnado import GenomicFigure -from docs._examples import REGION, intervals, links, narrowpeaks +from plotnado.examples import REGION, intervals, links, narrowpeaks +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame +# narrowpeaks() → DataFrame(chrom, start, end, name, score, strand, signalValue, pValue, qValue, peak) — replace with a .narrowPeak path or DataFrame +# links() → DataFrame(chrom1, start1, end1, chrom2, start2, end2, score) — replace with a BEDPE path or DataFrame fig = GenomicFigure(track_height=1.1) -fig.axis() fig.bed(intervals(), title="BED intervals", display="expanded", show_labels=True) fig.narrowpeak(narrowpeaks(), title="narrowPeak", color_by="signalValue", cmap="Oranges", show_summit=True) fig.links(links(), title="Links", color_by_score=True, cmap="viridis", alpha=0.8) +fig.axis() fig.plot(REGION) ``` -Caption: interval, peak, and link tracks use tabular inputs with genomic coordinate columns. - ## Themes -```python +```{python} from plotnado import GenomicFigure +from plotnado.examples import signal, REGION fig = GenomicFigure(theme="publication") +fig.scalebar() +fig.bigwig(signal(), title="Publication theme", style="fill", color="#1f77b4") +fig.axis() +fig.plot("chr1:1,000,000-1,100,000", show=True) + +fig = GenomicFigure(theme="minimal") +fig.scalebar() +fig.bigwig(signal(), title="Minimal theme", style="fill", color="#1f77b4") +fig.axis() +fig.plot("chr1:1,000,000-1,100,000", show=False) # Note: this is just for demonstration; the theme doesn't affect the plot command, but it does affect the default styling of the figure and tracks. + ``` Built-in themes include `"default"`, `"minimal"`, and `"publication"`. diff --git a/docs/best_practices.qmd b/docs/best_practices.qmd index a496e2f..75f40ca 100644 --- a/docs/best_practices.qmd +++ b/docs/best_practices.qmd @@ -28,7 +28,9 @@ Call `autocolor()` once and reuse `color_group` for related tracks. ```{python} from plotnado import GenomicFigure -from docs._examples import REGION, intervals, signal +from plotnado.examples import REGION, intervals, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame fig = GenomicFigure(track_height=1.05).autocolor("Set2") fig.bigwig(signal(phase=0.0), title="A signal", color_group="A") diff --git a/docs/examples.qmd b/docs/examples.qmd new file mode 100644 index 0000000..6050269 --- /dev/null +++ b/docs/examples.qmd @@ -0,0 +1,164 @@ +--- +jupyter: python3 +execute: + warning: false + message: false +--- + +# Worked Examples + +Real-data examples using public BigWig and BigBed files from the +[Blueprint Epigenome Project](http://www.blueprint-epigenome.eu/) (GRCh38). +These examples require network access to the EBI FTP server. + +```{python} +#| echo: false +from pathlib import Path +import plotnado +from plotnado import GenomicFigure, PlotStyle + +repo_root = Path(plotnado.__file__).resolve().parent.parent + +BLUEPRINT_MONOCYTE_BW = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150529.bw" +) +BLUEPRINT_MONOCYTE_BB = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150527.bb" +) +BLUEPRINT_RNA_FILES = { + "T12-15 plasma cell RNA": ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38" + "/tonsil/T12-15/plasma_cell/RNA-Seq/IDIBAPS" + "/T12-15-PC.signal.star_grape2_crg.GRCh38.20150815.bw" + ), + "T12-16 plasma cell RNA": ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38" + "/tonsil/T12-16/plasma_cell/RNA-Seq/IDIBAPS" + "/T12-16-PC.signal.star_grape2_crg.GRCh38.20150815.bw" + ), +} +CANDIDATE_BED = str(repo_root / "examples" / "data" / "blueprint_monocyte_lyz_candidate_peaks.bed") + +LYZ_REGION = "chr12:69,310,000-69,400,000" +ACTB_REGION = "chr7:5,527,000-5,535,000" +``` + +--- + +## Example 1: stacked BigWig tracks with on-track labels + +Two Blueprint plasma-cell RNA signal tracks rendered in separate panels with +on-track label boxes and a shared autoscale group so both panels use the same +y-axis limits. + +```{python} +#| fig-cap: "Blueprint plasma-cell RNA at the ACTB locus. Both tracks share one autoscale group, so differences in expression level are directly comparable." +fig = GenomicFigure(theme="publication") +fig.scalebar() +for name, url in BLUEPRINT_RNA_FILES.items(): + fig.bigwig( + url, + title=name, + style="fragment", + height=0.55, + autoscale_group="blueprint_rna", + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.95, + title_height=0.5, + scale_height=0.5, + plot_scale=True, + ) +fig.genes("hg38", height=0.55) +fig.axis() +fig.plot(ACTB_REGION, extend=10_000) +``` + +--- + +## Example 2: `bigwig_overlay()` with a shared axis + +`bigwig_overlay()` places multiple signals in one panel on a shared y-axis. +Use this when you want to compare signal shape rather than absolute levels. + +```{python} +#| fig-cap: "Overlay of two Blueprint RNA tracks at the ACTB locus. One panel, one y-axis, colors distinguish the two samples." +fig = GenomicFigure(theme="publication") +fig.scalebar() +fig.bigwig_overlay( + list(BLUEPRINT_RNA_FILES.values()), + title="Blueprint plasma-cell RNA overlay", + colors=["#FF9D1B", "#1E5DF8"], + alpha=0.65, + height=0.9, + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.95, + title_height=0.5, + scale_height=0.5, + plot_scale=True, + style=PlotStyle.FRAGMENT, +) +fig.genes("hg38", height=0.55) +fig.axis() +fig.plot(ACTB_REGION, extend=10_000) +``` + +--- + +## Example 3: signal + peaks + curated BED at the LYZ locus + +A three-layer review plot: BigWig signal, hub peak calls from a remote BigBed, +and a checked-in BED of candidate loci for follow-up. + +```{python} +#| fig-cap: "Blueprint monocyte H3K27ac signal, hub peak calls, and curated candidate peaks at the LYZ locus." +fig = GenomicFigure(theme="publication") +fig.scalebar() +fig.bigwig( + BLUEPRINT_MONOCYTE_BW, + title="Monocyte H3K27ac", + style=PlotStyle.FRAGMENT, + height=0.6, + color="#d9485f", + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.95, + title_height=0.5, + scale_height=0.5, + plot_scale=True, +) +fig.bed( + BLUEPRINT_MONOCYTE_BB, + title="H3K27ac peaks (bigBed)", + color="#f59e0b", + draw_edges=False, + height=0.42, + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.95, + title_height=0.7, + show_labels=False, +) +fig.bed( + CANDIDATE_BED, + title="Curated follow-up peaks", + color="#0f766e", + draw_edges=True, + show_labels=True, + label_field="name", + font_size=7, + height=0.5, + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.95, + title_height=0.7, +) +fig.genes("hg38", height=0.55) +fig.axis() +fig.plot(LYZ_REGION, extend=10_000) +``` diff --git a/docs/figure_workflows.qmd b/docs/figure_workflows.qmd index 0d6a7ec..11bf066 100644 --- a/docs/figure_workflows.qmd +++ b/docs/figure_workflows.qmd @@ -10,9 +10,16 @@ execute: ## Single region ```{python} -from docs._examples import REGION, quickstart_figure - -fig = quickstart_figure() +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal, intervals +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame + +fig = GenomicFigure(width=11, track_height=1.25) +fig.scalebar() +fig.bigwig(signal(scale=1.15), title="Synthetic signal", style="fill", color="#1f77b4") +fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True) +fig.axis() fig.plot(REGION) ``` diff --git a/docs/quickstart.qmd b/docs/quickstart.qmd index debbea2..30b9ee0 100644 --- a/docs/quickstart.qmd +++ b/docs/quickstart.qmd @@ -24,20 +24,33 @@ uv run plotnado --help This example is fully in-memory, so it renders during the docs build without external files. ```{python} -from docs._examples import REGION, quickstart_figure - -fig = quickstart_figure() +#| fig-cap: "Scale, axis, signal, and BED-like intervals from deterministic DataFrames." +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal, intervals +# signal() returns a synthetic DataFrame(chrom, start, end, value) — in real use pass a BigWig path/URL or any DataFrame with those columns +# intervals() returns a synthetic DataFrame(chrom, start, end, name) — in real use pass a BED/BigBed path, URL, or DataFrame + +fig = GenomicFigure(width=11, track_height=1.25) +fig.scalebar() +fig.bigwig(signal(scale=1.15), title="Synthetic signal", style="fill", color="#1f77b4") +fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True) +fig.axis() fig.plot(REGION) ``` -Caption: scale, axis, signal, and BED-like intervals from deterministic DataFrames. - ## Save the same figure ```python -from docs._examples import REGION, quickstart_figure - -fig = quickstart_figure() +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal, intervals +# signal() → DataFrame(chrom, start, end, value); intervals() → DataFrame(chrom, start, end, name) +# replace with BigWig paths/URLs or real DataFrames + +fig = GenomicFigure(width=11, track_height=1.25) +fig.scalebar() +fig.bigwig(signal(scale=1.15), title="Synthetic signal", style="fill", color="#1f77b4") +fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True) +fig.axis() fig.save("quickstart.png", region=REGION) ``` diff --git a/docs/quickstart_tracks.qmd b/docs/quickstart_tracks.qmd index 8a3d238..429f8aa 100644 --- a/docs/quickstart_tracks.qmd +++ b/docs/quickstart_tracks.qmd @@ -12,27 +12,30 @@ Use helper methods for most figures, `add_track()` when names come from configur ## Helper methods ```{python} +#| fig-cap: "Helper methods keep ordinary figure construction readable and chainable." from plotnado import GenomicFigure -from docs._examples import REGION, intervals, signal +from plotnado.examples import REGION, intervals, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame fig = GenomicFigure(track_height=1.2) fig.autocolor("Set2") fig.scalebar() -fig.axis() fig.bigwig(signal(), title="Signal", style="fill", color_group="sample-a") fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True, color_group="sample-a") +fig.axis() fig.plot(REGION) ``` -Caption: helper methods keep ordinary figure construction readable and chainable. - ## Alias entry point `add_track()` is useful when track names are read from YAML, TOML, or another runtime source. ```{python} +#| fig-cap: "Aliases map to the same track constructors used by helper methods." from plotnado import GenomicFigure -from docs._examples import REGION, signal +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame fig = GenomicFigure().autocolor("Dark2") fig.add_track("scalebar") @@ -42,15 +45,14 @@ fig.add_track("bigwig", data=signal(phase=0.8), title="Replicate B", alpha=0.75) fig.plot(REGION) ``` -Caption: aliases map to the same track constructors used by helper methods. - ## Explicit objects Use explicit classes when you need to pass track objects around before adding them. ```python from plotnado import BigWigTrack, GenomicFigure -from docs._examples import signal +from plotnado.examples import signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame track = BigWigTrack(data=signal(), title="Reusable signal", style="fill") fig = GenomicFigure().add_track(track) @@ -86,14 +88,27 @@ fig.bigwig( Treat the overlay as the panel that owns the shared scale. ```{python} -from docs._examples import REGION, overlay_comparison +#| fig-cap: "Ordinary signal tracks and the overlay share one `autoscale_group`." +from plotnado import GenomicFigure +from plotnado.examples import REGION, review_signal +# review_signal() → DataFrame(chrom, start, end, value) — replace with BigWig paths/URLs or DataFrames -fig = overlay_comparison() +fig = GenomicFigure(track_height=1.2) +fig.autoscale(True) +fig.highlight("chr1:1,032,000-1,046,000") +fig.highlight_style(color="#ffdd57", alpha=0.22) +fig.bigwig(review_signal(2.0), title="Control", autoscale_group="signal", color="#1f77b4") +fig.bigwig(review_signal(10.0, 1.2), title="Treatment", autoscale_group="signal", color="#d62728") +fig.overlay( + [review_signal(5.5, 2.0), review_signal(6.5, 2.8)], + title="Overlay", + autoscale_group="signal", + colors=["#2ca02c", "#9467bd"], + alpha=0.55, +) fig.plot(REGION) ``` -Caption: ordinary signal tracks and the overlay share one `autoscale_group`. - ## Option lookup ```python diff --git a/docs/recipes.qmd b/docs/recipes.qmd index ff5a6df..13e9848 100644 --- a/docs/recipes.qmd +++ b/docs/recipes.qmd @@ -12,20 +12,40 @@ Recipes combine several options into patterns that are useful in real figures. ## Compare styles before choosing one ```{python} -from docs._examples import REGION, style_comparison - -fig = style_comparison() +#| fig-cap: "Render style comparisons in the docs or notebook instead of relying on remembered defaults." +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame + +fig = GenomicFigure(track_height=1.15) +fig.scalebar() +fig.bigwig(signal(phase=0.0), title="fill", style="fill", color="#1f77b4") +fig.bigwig(signal(phase=0.8), title="fragment", style="fragment", color="#d62728") +fig.bigwig(signal(phase=1.6), title="scatter", style="scatter", color="#2ca02c", scatter_point_size=10) +fig.bigwig(signal(phase=2.4), title="std", style="std", color="#9467bd") fig.plot(REGION) ``` -Caption: render style comparisons in the docs or notebook instead of relying on remembered defaults. - ## Autoscale + overlay + highlight ```{python} -from docs._examples import REGION, overlay_comparison - -fig = overlay_comparison() +from plotnado import GenomicFigure +from plotnado.examples import REGION, review_signal +# review_signal() → DataFrame(chrom, start, end, value) — replace with BigWig paths/URLs or DataFrames + +fig = GenomicFigure(track_height=1.2) +fig.autoscale(True) +fig.highlight("chr1:1,032,000-1,046,000") +fig.highlight_style(color="#ffdd57", alpha=0.22) +fig.bigwig(review_signal(2.0), title="Control", autoscale_group="signal", color="#1f77b4") +fig.bigwig(review_signal(10.0, 1.2), title="Treatment", autoscale_group="signal", color="#d62728") +fig.overlay( + [review_signal(5.5, 2.0), review_signal(6.5, 2.8)], + title="Overlay", + autoscale_group="signal", + colors=["#2ca02c", "#9467bd"], + alpha=0.55, +) fig.plot(REGION) ``` @@ -35,7 +55,9 @@ Use this when one overlay panel should sit next to ordinary signal tracks withou ```{python} from plotnado import GenomicFigure -from docs._examples import REGION, intervals, signal +from plotnado.examples import REGION, intervals, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame fig = GenomicFigure(track_height=1.05).autocolor("tab10") fig.bigwig(signal(phase=0.0), title="Condition A signal", color_group="A") @@ -51,18 +73,43 @@ Use the same `color_group` for tracks that represent the same sample, condition, ```{python} from plotnado import GenomicFigure -from docs._examples import REGION, intervals, links, narrowpeaks +from plotnado.examples import REGION, intervals, links, narrowpeaks +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame +# narrowpeaks() → DataFrame(chrom, start, end, name, score, strand, signalValue, pValue, qValue, peak) — replace with a .narrowPeak path or DataFrame +# links() → DataFrame(chrom1, start1, end1, chrom2, start2, end2, score) — replace with a BEDPE path or DataFrame fig = GenomicFigure(track_height=1.1) -fig.axis() fig.bed(intervals(), title="Annotations", display="expanded", show_labels=True) fig.narrowpeak(narrowpeaks(), title="Peaks", color_by="signalValue", cmap="Oranges") fig.links(links(), title="Interactions", color_by_score=True, cmap="viridis") +fig.axis() fig.plot(REGION) ``` Use interval tracks for local annotations and `links()` for paired genomic anchors. +## Control signal resolution + +Use `bin_size` or `n_bins` to match bin width to the figure size and zoom level. Coarser bins reduce visual noise in wide regions; finer bins reveal peak shape at high zoom. + +```{python} +#| fig-cap: "Same signal at three resolutions. `bin_size` keeps physical bin width fixed in bp; `n_bins` keeps the count fixed regardless of region size." +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame + +fig = GenomicFigure(track_height=1.1) +fig.scalebar() +fig.bigwig(signal(), title="bin_size=5000", style="fill", color="#9467bd", bin_size=5000) +fig.bigwig(signal(), title="bin_size=1000", style="fill", color="#d62728", bin_size=1000) +fig.bigwig(signal(), title="n_bins=50", style="fill", color="#e67e00", n_bins=50) +fig.bigwig(signal(), title="native 200 bp", style="fill", color="#1f77b4") +fig.axis() +fig.plot(REGION) +``` + +For BigWig files the rebinning is applied after fetching the native intervals, so there is no extra file I/O cost. For DataFrames the same weighted-average logic applies. + ## Save reusable figure definitions ```python diff --git a/docs/reference.qmd b/docs/reference.qmd index 1093920..8623e93 100644 --- a/docs/reference.qmd +++ b/docs/reference.qmd @@ -55,9 +55,24 @@ fig.highlight_style(color="#ffdd57", alpha=0.22) The rendered example shows highlight, autoscale, and overlay behavior together. ```{python} -from docs._examples import REGION, overlay_comparison +from plotnado import GenomicFigure +from plotnado.examples import REGION, review_signal +# review_signal() → DataFrame(chrom, start, end, value) — replace with BigWig paths/URLs or DataFrames -overlay_comparison().plot(REGION) +fig = GenomicFigure(track_height=1.2) +fig.autoscale(True) +fig.highlight("chr1:1,032,000-1,046,000") +fig.highlight_style(color="#ffdd57", alpha=0.22) +fig.bigwig(review_signal(2.0), title="Control", autoscale_group="signal", color="#1f77b4") +fig.bigwig(review_signal(10.0, 1.2), title="Treatment", autoscale_group="signal", color="#d62728") +fig.overlay( + [review_signal(5.5, 2.0), review_signal(6.5, 2.8)], + title="Overlay", + autoscale_group="signal", + colors=["#2ca02c", "#9467bd"], + alpha=0.55, +) +fig.plot(REGION) ``` ## Plotting and saving diff --git a/docs/track_aliases.qmd b/docs/track_aliases.qmd index eb44655..abc2296 100644 --- a/docs/track_aliases.qmd +++ b/docs/track_aliases.qmd @@ -12,8 +12,10 @@ execute: ## Alias usage ```{python} +#| fig-cap: "Alias-added tracks render the same way as helper-method tracks." from plotnado import GenomicFigure -from docs._examples import REGION, signal +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame fig = GenomicFigure().autocolor("Set2") fig.add_track("scalebar") @@ -23,8 +25,6 @@ fig.add_track("bigwig", data=signal(phase=0.9), title="Replicate B", style="fill fig.plot(REGION) ``` -Caption: alias-added tracks render the same way as helper-method tracks. - ## How kwargs are routed - Track constructor fields are passed directly. diff --git a/docs/track_catalog.qmd b/docs/track_catalog.qmd index 9a62cc6..a5c236a 100644 --- a/docs/track_catalog.qmd +++ b/docs/track_catalog.qmd @@ -12,9 +12,17 @@ Choose a track by the visual question you need to answer. The tables are compact ## Signal tracks ```{python} -from docs._examples import REGION, style_comparison - -style_comparison().plot(REGION) +from plotnado import GenomicFigure +from plotnado.examples import REGION, signal +# signal() → DataFrame(chrom, start, end, value) — replace with a BigWig path/URL or DataFrame + +fig = GenomicFigure(track_height=1.15) +fig.scalebar() +fig.bigwig(signal(phase=0.0), title="fill", style="fill", color="#1f77b4") +fig.bigwig(signal(phase=0.8), title="fragment", style="fragment", color="#d62728") +fig.bigwig(signal(phase=1.6), title="scatter", style="scatter", color="#2ca02c", scatter_point_size=10) +fig.bigwig(signal(phase=2.4), title="std", style="std", color="#9467bd") +fig.plot(REGION) ``` | Track / aliases | Use when | Input | @@ -26,17 +34,22 @@ style_comparison().plot(REGION) There is no separate `GenomicFigure.bedgraph()` method; bedGraph-like inputs use `bigwig`. +Pass `n_bins=N` or `bin_size=B` to any bigwig-family track to rebin to a fixed resolution. Bins are computed as weighted averages of overlapping source intervals and work for both BigWig files and DataFrames. + ## Interval, peak, and link tracks ```{python} from plotnado import GenomicFigure -from docs._examples import REGION, intervals, links, narrowpeaks +from plotnado.examples import REGION, intervals, links, narrowpeaks +# intervals() → DataFrame(chrom, start, end, name) — replace with a BED/BigBed path, URL, or DataFrame +# narrowpeaks() → DataFrame(chrom, start, end, name, score, strand, signalValue, pValue, qValue, peak) — replace with a .narrowPeak path or DataFrame +# links() → DataFrame(chrom1, start1, end1, chrom2, start2, end2, score) — replace with a BEDPE path or DataFrame fig = GenomicFigure(track_height=1.1) -fig.axis() fig.bed(intervals(), title="BED", display="expanded", show_labels=True) fig.narrowpeak(narrowpeaks(), title="narrowPeak", color_by="signalValue", cmap="Oranges", show_summit=True) fig.links(links(), title="Links", color_by_score=True, cmap="viridis") +fig.axis() fig.plot(REGION) ``` diff --git a/index.qmd b/index.qmd index dddaa49..6b4ab2a 100644 --- a/index.qmd +++ b/index.qmd @@ -1,7 +1,6 @@ --- title: PlotNado page-layout: full -title-block-style: none jupyter: python3 execute: echo: false @@ -9,8 +8,6 @@ execute: message: false --- -# PlotNado - PlotNado builds genome browser-style figures from Python code or editable YAML templates. It is useful when you need reproducible plots for BigWig-like signals, intervals, peaks, links, genes, and optional matrix-style genomic data. @@ -23,14 +20,44 @@ or [Quick Start](docs/quickstart.qmd) if you already have the uv environment. ## First figure ```{python} -from docs._examples import REGION, quickstart_figure +#| echo: true +from pathlib import Path +import plotnado +from plotnado import GenomicFigure, PlotStyle + +BLUEPRINT_MONOCYTE_BW = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150529.bw" +) +BLUEPRINT_MONOCYTE_BB = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150527.bb" +) +CANDIDATE_BED = str( + Path(plotnado.__file__).parent.parent / "examples" / "data" / "blueprint_monocyte_lyz_candidate_peaks.bed" +) -quickstart_figure().plot(REGION) +fig = GenomicFigure(width=11, track_height=0.7, theme="publication") +fig.scalebar() +fig.genes("hg38", height=0.55) +fig.bigwig( + BLUEPRINT_MONOCYTE_BW, + title="Monocyte H3K27ac", + style=PlotStyle.FRAGMENT, + color="#d9485f", + height=0.75, + label_on_track=True, + label_box_enabled=True, + plot_scale=True, +) +fig.bed(BLUEPRINT_MONOCYTE_BB, title="H3K27ac peaks", color="#f59e0b", height=0.35, show_labels=False) +fig.bed(CANDIDATE_BED, title="Candidate loci", color="#0f766e", show_labels=True, label_field="name", font_size=7) +fig.axis() +fig.plot("chr12:69,310,000-69,400,000") ``` -The example is rendered by Quarto from deterministic in-memory data. The same pattern -is used throughout the docs so code and output stay in sync. - ## Common routes | Need | Start here | @@ -52,9 +79,9 @@ from plotnado import GenomicFigure fig = ( GenomicFigure() .scalebar() - .axis() .bigwig(signal, title="Signal", style="fill") .bed(intervals, title="Intervals", display="expanded") + .axis() ) fig.save("plot.png", region="chr1:1,010,000-1,080,000") ``` @@ -72,15 +99,4 @@ uv run plotnado plot template.yaml --region chr1:1,000,000-1,100,000 --output pl Use this route when the figure definition should be reviewed or versioned as a human-editable YAML file. -## Visual options - -Most option guidance is shown where it matters. For example, signal style choices -are visible in the rendered comparison below. - -```{python} -from docs._examples import style_comparison - -style_comparison().plot(REGION) -``` - For compact runtime lookup, use [Compact Options](docs/option_lookup.qmd). diff --git a/plotnado/__init__.py b/plotnado/__init__.py index cf56edc..c950a78 100644 --- a/plotnado/__init__.py +++ b/plotnado/__init__.py @@ -6,6 +6,7 @@ """ from .figure import GenomicFigure +from .tracks.genes import register_genome from .igv import parse_igv_session, IgvSession from .hub import parse_ucsc_hub, UcscHubSession from .widgets import TrackVisibilityWidget @@ -14,6 +15,7 @@ from .render import TemplateCompiler, RenderPlan, ResolvedTrack from .tracks import ( # Enums + BedLabelPosition, CollectionStyle, DisplayMode, FontWeight, @@ -81,6 +83,7 @@ __all__ = [ "GenomicFigure", + "register_genome", "parse_igv_session", "IgvSession", "parse_ucsc_hub", @@ -97,6 +100,7 @@ "RenderPlan", "ResolvedTrack", # Enums + "BedLabelPosition", "DisplayMode", "CollectionStyle", "FontWeight", diff --git a/plotnado/_kwargs.py b/plotnado/_kwargs.py index 1973dee..a8b533b 100644 --- a/plotnado/_kwargs.py +++ b/plotnado/_kwargs.py @@ -47,6 +47,8 @@ class BigwigKwargs(TypedDict, total=False): smoothing_center: bool min_value: float | None max_value: float | None + n_bins: int | None + bin_size: int | None plot_title: bool plot_scale: bool label_on_track: bool @@ -247,6 +249,7 @@ class BedKwargs(TypedDict, total=False): font_size: int rect_linewidth: float draw_edges: bool + label_position: BedLabelPosition plot_title: bool plot_scale: bool label_on_track: bool @@ -515,6 +518,7 @@ class NarrowpeakKwargs(TypedDict, total=False): font_size: int rect_linewidth: float draw_edges: bool + label_position: BedLabelPosition color_by: NarrowPeakColorBy | None cmap: str min_score: float | None diff --git a/plotnado/data/blueprint_monocyte_lyz_candidate_peaks.bed b/plotnado/data/blueprint_monocyte_lyz_candidate_peaks.bed new file mode 100644 index 0000000..8aed684 --- /dev/null +++ b/plotnado/data/blueprint_monocyte_lyz_candidate_peaks.bed @@ -0,0 +1,4 @@ +chr12 69344737 69344992 LYZ_promoter_peak +chr12 69347375 69347611 LYZ_tss_peak +chr12 69352454 69352660 LYZ_gene_body_peak +chr12 69359719 69360538 LYZ_downstream_peak diff --git a/plotnado/examples.py b/plotnado/examples.py new file mode 100644 index 0000000..e72dccb --- /dev/null +++ b/plotnado/examples.py @@ -0,0 +1,275 @@ +""" +Synthetic and example data helpers for plotnado documentation and tutorials. + +These functions return small in-memory DataFrames that mimic real genomic +data formats. Swap them for real files or URLs in production: + + signal() → BigWig path / URL, or DataFrame(chrom, start, end, value) + intervals() → BED path / URL, or DataFrame(chrom, start, end, name) + narrowpeaks() → .narrowPeak path, or DataFrame with ENCODE narrowPeak columns + links() → BEDPE path, or DataFrame(chrom1, start1, end1, chrom2, start2, end2, score) +""" + +from __future__ import annotations + +import importlib.util +from collections.abc import Sequence +from pathlib import Path + +import numpy as np +import pandas as pd + +from plotnado import GenomicFigure, PlotStyle + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +REGION = "chr1:1,010,000-1,080,000" +WIDE_REGION = "chr1:1,000,000-1,110,000" + +BLUEPRINT_MONOCYTE_BW = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150529.bw" +) +BLUEPRINT_MONOCYTE_BB = ( + "http://ftp.ebi.ac.uk/pub/databases/blueprint/data/homo_sapiens/GRCh38/venous_blood" + "/C000S5/CD14-positive_CD16-negative_classical_monocyte/ChIP-Seq/NCMLS" + "/C000S5H2.ERX173536.H3K27ac.bwa.GRCh38.20150527.bb" +) +BLUEPRINT_REGION = "chr12:69,310,000-69,400,000" + +_CANDIDATE_BED = str(Path(__file__).parent / "data" / "blueprint_monocyte_lyz_candidate_peaks.bed") + +# --------------------------------------------------------------------------- +# Utilities +# --------------------------------------------------------------------------- + + +def has_module(module_name: str) -> bool: + return importlib.util.find_spec(module_name) is not None + + +# --------------------------------------------------------------------------- +# Synthetic signal generators +# --------------------------------------------------------------------------- + + +def _chip_signal( + bins: np.ndarray, + peak_centers: Sequence[int], + peak_heights: Sequence[float], + peak_widths: Sequence[float], + noise_seed: int = 0, + noise_smooth: int = 7, +) -> np.ndarray: + """Sparse sharp Gaussian peaks on a smoothed exponential-noise background.""" + rng = np.random.default_rng(noise_seed) + n = len(bins) + noise = rng.exponential(0.25, n) + rng.exponential(0.05, n) + # Smooth noise so adjacent bins are correlated (mimics genomic read pileup) + if noise_smooth > 1: + kernel = np.ones(noise_smooth) / noise_smooth + noise = np.convolve(noise, kernel, mode="same") + values = np.clip(noise, 0, None) + for center, height, width in zip(peak_centers, peak_heights, peak_widths): + values += height * np.exp(-0.5 * ((bins - center) / width) ** 2) + return values + + +def signal( + start: int = 1_000_000, + end: int = 1_100_000, + step: int = 200, + phase: float = 0.0, + scale: float = 1.0, + baseline: float = 5.0, +) -> pd.DataFrame: + """ChIP-seq-like synthetic signal over chr1. + + Returns DataFrame(chrom, start, end, value). + Replace with a BigWig path/URL or your own DataFrame. + """ + bins = np.arange(start, end, step) + span = end - start + peak_centers = [ + int(start + span * (0.20 + phase * 0.04)), + int(start + span * (0.48 + phase * 0.03)), + int(start + span * (0.73 + phase * 0.02)), + ] + peak_heights = [h * scale * baseline / 7 for h in [8.0, 14.0, 6.0]] + peak_widths = [2_500, 3_000, 2_000] + values = _chip_signal(bins, peak_centers, peak_heights, peak_widths, noise_seed=int(phase * 100)) + return pd.DataFrame({"chrom": "chr1", "start": bins, "end": bins + step, "value": values}) + + +def review_signal(scale: float = 1.0, phase: float = 0.0) -> pd.DataFrame: + """Wider ChIP-seq-like signal for overlay/autoscale examples. + + Returns DataFrame(chrom, start, end, value). + Replace with a BigWig path/URL or your own DataFrame. + """ + bins = np.arange(1_000_000, 1_120_000, 200) + span = 120_000 + peak_centers = [ + int(1_000_000 + span * (0.22 + phase * 0.03)), + int(1_000_000 + span * (0.55 + phase * 0.02)), + int(1_000_000 + span * (0.78 + phase * 0.04)), + ] + peak_heights = [h * scale for h in [12.0, 20.0, 8.0]] + peak_widths = [3_000, 4_000, 2_500] + values = _chip_signal(bins, peak_centers, peak_heights, peak_widths, noise_seed=int(scale * 7 + phase * 13)) + return pd.DataFrame({"chrom": "chr1", "start": bins, "end": bins + 200, "value": values}) + + +# --------------------------------------------------------------------------- +# Synthetic interval data +# --------------------------------------------------------------------------- + + +def intervals() -> pd.DataFrame: + """Four synthetic genomic intervals on chr1. + + Returns DataFrame(chrom, start, end, name). + Replace with a BED/BigBed path, URL, or your own DataFrame. + """ + return pd.DataFrame( + { + "chrom": ["chr1", "chr1", "chr1", "chr1"], + "start": [1_008_000, 1_020_000, 1_050_000, 1_066_000], + "end": [1_014_000, 1_032_000, 1_061_000, 1_074_000], + "name": ["enhancer_a", "enhancer_b", "promoter", "domain"], + } + ) + + +def narrowpeaks() -> pd.DataFrame: + """Three synthetic ENCODE narrowPeak rows on chr1. + + Returns DataFrame with columns: chrom, start, end, name, score, strand, + signalValue, pValue, qValue, peak. + Replace with a .narrowPeak path or your own DataFrame. + """ + return pd.DataFrame( + { + "chrom": ["chr1", "chr1", "chr1"], + "start": [1_012_000, 1_038_000, 1_060_000], + "end": [1_018_000, 1_047_000, 1_070_000], + "name": ["np1", "np2", "np3"], + "score": [300, 700, 500], + "strand": [".", ".", "."], + "signalValue": [12.0, 48.0, 30.0], + "pValue": [5.2, 12.3, 8.1], + "qValue": [4.1, 10.0, 6.2], + "peak": [1200, 1800, 2200], + } + ) + + +def links() -> pd.DataFrame: + """Three synthetic interaction links on chr1. + + Returns DataFrame(chrom1, start1, end1, chrom2, start2, end2, score). + Replace with a BEDPE path or your own DataFrame. + """ + return pd.DataFrame( + { + "chrom1": ["chr1", "chr1", "chr1"], + "start1": [1_010_000, 1_022_000, 1_042_000], + "end1": [1_012_000, 1_024_000, 1_045_000], + "chrom2": ["chr1", "chr1", "chr1"], + "start2": [1_035_000, 1_054_000, 1_072_000], + "end2": [1_037_000, 1_056_000, 1_074_000], + "score": [2.2, 6.5, 9.8], + } + ) + + +# --------------------------------------------------------------------------- +# Pre-built figure helpers (used in docs; inline the code for notebooks) +# --------------------------------------------------------------------------- + + +def hero_figure() -> GenomicFigure: + """Real Blueprint monocyte H3K27ac figure used as the homepage hero plot.""" + fig = GenomicFigure(width=11, track_height=0.7, theme="publication") + fig.scalebar() + fig.genes("hg38", height=0.55) + fig.bigwig( + BLUEPRINT_MONOCYTE_BW, + title="Monocyte H3K27ac", + style=PlotStyle.FRAGMENT, + color="#d9485f", + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.92, + title_height=0.5, + scale_height=0.5, + plot_scale=True, + height=0.75, + ) + fig.bed( + BLUEPRINT_MONOCYTE_BB, + title="H3K27ac peaks", + color="#f59e0b", + draw_edges=False, + height=0.35, + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.92, + title_height=0.7, + show_labels=False, + ) + fig.bed( + _CANDIDATE_BED, + title="Candidate loci", + color="#0f766e", + draw_edges=True, + show_labels=True, + label_field="name", + font_size=7, + height=0.4, + label_on_track=True, + label_box_enabled=True, + label_box_alpha=0.92, + title_height=0.7, + ) + fig.axis() + return fig + + +def quickstart_figure() -> GenomicFigure: + fig = GenomicFigure(width=11, track_height=1.25) + fig.scalebar() + fig.bigwig(signal(scale=1.15), title="Synthetic signal", style="fill", color="#1f77b4") + fig.bed(intervals(), title="Intervals", display="expanded", show_labels=True) + fig.axis() + return fig + + +def style_comparison() -> GenomicFigure: + fig = GenomicFigure(track_height=1.15) + fig.scalebar() + fig.bigwig(signal(phase=0.0), title="fill", style="fill", color="#1f77b4") + fig.bigwig(signal(phase=0.8), title="fragment", style="fragment", color="#d62728") + fig.bigwig(signal(phase=1.6), title="scatter", style="scatter", color="#2ca02c", scatter_point_size=10) + fig.bigwig(signal(phase=2.4), title="std", style="std", color="#9467bd") + return fig + + +def overlay_comparison() -> GenomicFigure: + fig = GenomicFigure(track_height=1.2) + fig.autoscale(True) + fig.highlight("chr1:1,032,000-1,046,000") + fig.highlight_style(color="#ffdd57", alpha=0.22) + fig.bigwig(review_signal(2.0), title="Control", autoscale_group="signal", color="#1f77b4") + fig.bigwig(review_signal(10.0, 1.2), title="Treatment", autoscale_group="signal", color="#d62728") + fig.overlay( + [review_signal(5.5, 2.0), review_signal(6.5, 2.8)], + title="Overlay", + autoscale_group="signal", + colors=["#2ca02c", "#9467bd"], + alpha=0.55, + ) + return fig diff --git a/plotnado/figure.pyi b/plotnado/figure.pyi index 52fde43..31b78f3 100644 --- a/plotnado/figure.pyi +++ b/plotnado/figure.pyi @@ -114,6 +114,8 @@ class GenomicFigure: smoothing_center: bool = True, min_value: float | None = None, max_value: float | None = None, + n_bins: int | None = None, + bin_size: int | None = None, plot_title: bool = True, plot_scale: bool = True, label_on_track: bool = False, @@ -206,7 +208,7 @@ class GenomicFigure: autoscale_group: str | None = None, color_group: str | None = None, show_chromosome: bool = False, - color: str = 'steelblue', + color: str = 'black', alpha: float = 1.0, linewidth: float = 1.0, font_size: int = 9, @@ -214,7 +216,7 @@ class GenomicFigure: use_human_readable_labels: bool = False, tick_height: float = 0.15, axis_linewidth: float = 1.1, - tick_color: str = '#333333', + tick_color: str = 'black', tick_linewidth: float = 0.9, chromosome_fontweight: FontWeight = FontWeight.BOLD, plot_title: bool = True, @@ -319,6 +321,7 @@ class GenomicFigure: font_size: int = 8, rect_linewidth: float = 0.7, draw_edges: bool = True, + label_position: BedLabelPosition = BedLabelPosition.ABOVE, plot_title: bool = True, plot_scale: bool = True, label_on_track: bool = False, @@ -595,6 +598,7 @@ class GenomicFigure: font_size: int = 8, rect_linewidth: float = 0.7, draw_edges: bool = True, + label_position: BedLabelPosition = BedLabelPosition.ABOVE, color_by: NarrowPeakColorBy | None = NarrowPeakColorBy.SIGNAL_VALUE, cmap: str = 'Oranges', min_score: float | None = None, diff --git a/plotnado/figure_methods.py b/plotnado/figure_methods.py index cd7b9b7..571ad59 100644 --- a/plotnado/figure_methods.py +++ b/plotnado/figure_methods.py @@ -78,11 +78,13 @@ def bigwig( baseline_alpha: float = ..., baseline_color: str = ..., baseline_linewidth: float = ..., + bin_size: int | None = ..., color: str = ..., fill: bool = ..., linewidth: float = ..., max_value: float | None = ..., min_value: float | None = ..., + n_bins: int | None = ..., scatter_point_size: float = ..., show_baseline: bool = ..., smoothing_center: bool = ..., @@ -379,6 +381,7 @@ def bed( font_size: int = ..., interval_height: float = ..., label_field: str = ..., + label_position: BedLabelPosition = ..., linewidth: float = ..., max_rows: int = ..., rect_linewidth: float = ..., @@ -839,6 +842,7 @@ def narrowpeak( font_size: int = ..., interval_height: float = ..., label_field: str = ..., + label_position: BedLabelPosition = ..., linewidth: float = ..., max_rows: int = ..., max_score: float | None = ..., diff --git a/plotnado/tracks/__init__.py b/plotnado/tracks/__init__.py index 6562c52..81adf9e 100644 --- a/plotnado/tracks/__init__.py +++ b/plotnado/tracks/__init__.py @@ -5,6 +5,7 @@ """ from .enums import ( + BedLabelPosition, CollectionStyle, DisplayMode, FontWeight, @@ -83,6 +84,7 @@ __all__ = [ # Enums + "BedLabelPosition", "DisplayMode", "CollectionStyle", "FontWeight", diff --git a/plotnado/tracks/axis.py b/plotnado/tracks/axis.py index 6f5771b..50d8bf0 100644 --- a/plotnado/tracks/axis.py +++ b/plotnado/tracks/axis.py @@ -25,6 +25,7 @@ class GenomicAxisAesthetics(BaseAesthetics): num_ticks: Approximate number of ticks to display show_chromosome: Whether to show chromosome name """ + color: str = Field(default="black", description="Color for the axis baseline.") font_size: int = Field(default=9, description="Font size for tick and chromosome labels.") num_ticks: int = Field(default=5, description="Target number of tick marks across the region.") show_chromosome: bool = Field(default=True, description="Render chromosome name label near the axis.") @@ -34,7 +35,7 @@ class GenomicAxisAesthetics(BaseAesthetics): ) tick_height: float = Field(default=0.15, description="Tick length drawn downward from axis baseline.") axis_linewidth: float = Field(default=1.1, description="Line width of the horizontal axis baseline.") - tick_color: str = Field(default="#333333", description="Color for tick marks and tick labels.") + tick_color: str = Field(default="black", description="Color for tick marks and tick labels.") tick_linewidth: float = Field(default=0.9, description="Line width of tick marks.") chromosome_fontweight: FontWeight = Field( default=FontWeight.BOLD, diff --git a/plotnado/tracks/bed.py b/plotnado/tracks/bed.py index 087d34a..53499fc 100644 --- a/plotnado/tracks/bed.py +++ b/plotnado/tracks/bed.py @@ -13,7 +13,7 @@ from .region import GenomicRegion from .base import Track, TrackLabeller from .utils import clean_axis, read_bed_regions -from .enums import DisplayMode, TrackType +from .enums import BedLabelPosition, DisplayMode, TrackType from .aesthetics import BaseAesthetics from .registry import registry @@ -49,6 +49,10 @@ class BedAesthetics(BaseAesthetics): font_size: int = Field(default=8, description="Font size for interval labels.") rect_linewidth: float = Field(default=0.7, description="Border line width for interval rectangles.") draw_edges: bool = Field(default=True, description="Draw rectangle borders for intervals.") + label_position: BedLabelPosition = Field( + default=BedLabelPosition.ABOVE, + description="Where to place interval labels: 'above', 'inside', or 'below' the rectangle.", + ) @registry.register(TrackType.BED, aliases=["annotation", "unknown"]) @@ -152,6 +156,12 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: return row_scale = 1.0 / max(1, self.max_rows) + # In expanded mode cap rect height so rows don't overflow y=[0, 1] + effective_height = ( + self.interval_height + if self.display == DisplayMode.COLLAPSED + else min(self.interval_height, row_scale * 0.85) + ) row_last_positions: list[int] = [] for row in data.itertuples(): @@ -172,11 +182,17 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: else ((row_index + 0.5) * row_scale) ) + # Clip to region — intervals may extend beyond the viewed window + draw_start = max(float(start), float(gr.start)) + draw_end = min(float(end), float(gr.end)) + if draw_end <= draw_start: + continue + # Draw interval rect = matplotlib.patches.Rectangle( - (start, ypos - self.interval_height / 2), - end - start, - self.interval_height, + (draw_start, ypos - effective_height / 2), + draw_end - draw_start, + effective_height, linewidth=self.rect_linewidth if self.draw_edges else 0, edgecolor=self.edge_color if self.draw_edges else "none", facecolor=self.color, @@ -186,17 +202,27 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: # Draw label if enabled if self.show_labels and hasattr(row, self.label_field): - label = getattr(row, self.label_field) - # Position label above the peak, within track bounds - label_ypos = ypos + self.interval_height / 2 + 0.05 + label_text = str(getattr(row, self.label_field)) + label_xpos = float(start) + float(end) + label_xpos /= 2 # true midpoint — allowed to go off-screen + match self.label_position: + case BedLabelPosition.ABOVE: + label_ypos = ypos + effective_height / 2 + 0.03 + va = "bottom" + case BedLabelPosition.INSIDE: + label_ypos = ypos + va = "center" + case BedLabelPosition.BELOW: + label_ypos = ypos - effective_height / 2 - 0.03 + va = "top" ax.text( - (start + end) / 2, + label_xpos, label_ypos, - str(label), + label_text, ha="center", - va="bottom", + va=va, fontsize=self.font_size, - clip_on=True, # Clip text that extends outside axis + clip_on=True, ) ax.set_xlim(gr.start, gr.end) diff --git a/plotnado/tracks/bigwig.py b/plotnado/tracks/bigwig.py index 834fdb7..7dbdb41 100644 --- a/plotnado/tracks/bigwig.py +++ b/plotnado/tracks/bigwig.py @@ -67,6 +67,17 @@ class BigwigAesthetics(BaseAesthetics): description="Optional fixed upper y-limit; auto-derived when omitted.", ) + n_bins: int | None = Field( + default=None, + ge=1, + description="Divide the plotted region into this many equal bins. Overrides bin_size.", + ) + bin_size: int | None = Field( + default=None, + ge=1, + description="Bin width in base pairs. Ignored when n_bins is set.", + ) + @registry.register(TrackType.BIGWIG, aliases=["bw", "signal", "bedgraph"]) class BigWigTrack(Track): @@ -144,12 +155,54 @@ def _fetch_from_df(self, gr: GenomicRegion) -> BedgraphDataFrame: df.loc[mask, ["start", "end", "value", "chrom"]].copy() ) + def _resolve_n_bins(self, gr: GenomicRegion) -> int | None: + if self.n_bins is not None: + return int(self.n_bins) + if self.bin_size is not None: + return max(1, int((gr.end - gr.start) / self.bin_size)) + return None + + def _rebin(self, data: pd.DataFrame, gr: GenomicRegion, n_bins: int) -> pd.DataFrame: + """Weighted-average rebin of bedgraph data into n_bins equal-width bins over the region.""" + if data.empty: + return data + + edges = np.linspace(gr.start, gr.end, n_bins + 1) + bin_starts = edges[:-1] + bin_ends = edges[1:] + + src_s = data["start"].to_numpy(dtype=float) + src_e = data["end"].to_numpy(dtype=float) + src_v = data["value"].to_numpy(dtype=float) + valid = ~np.isnan(src_v) + src_s, src_e, src_v = src_s[valid], src_e[valid], src_v[valid] + + bin_values = np.full(n_bins, np.nan) + for i in range(n_bins): + overlap = np.maximum(0.0, np.minimum(src_e, bin_ends[i]) - np.maximum(src_s, bin_starts[i])) + w = overlap.sum() + if w > 0: + bin_values[i] = (src_v * overlap).sum() / w + + keep = ~np.isnan(bin_values) + return pd.DataFrame({ + "chrom": gr.chromosome, + "start": bin_starts[keep].astype(int), + "end": bin_ends[keep].astype(int), + "value": bin_values[keep], + }) + def fetch_data(self, gr: GenomicRegion) -> BedgraphDataFrame: """Fetch data for the given genomic region.""" if isinstance(self.data, pd.DataFrame): df = self._fetch_from_df(gr) else: df = self._fetch_from_disk(gr) + + n = self._resolve_n_bins(gr) + if n is not None: + df = self._rebin(df, gr, n) + return BedgraphDataFrame(df) def _apply_smoothing(self, data: pd.DataFrame) -> pd.DataFrame: diff --git a/plotnado/tracks/enums.py b/plotnado/tracks/enums.py index 2503433..4665bad 100644 --- a/plotnado/tracks/enums.py +++ b/plotnado/tracks/enums.py @@ -97,6 +97,14 @@ class BigWigDiffMethod(str, Enum): LOG2RATIO = "log2ratio" +class BedLabelPosition(str, Enum): + """Where to anchor interval label text relative to the rectangle.""" + + ABOVE = "above" + INSIDE = "inside" + BELOW = "below" + + class NarrowPeakColorBy(str, Enum): """Fields that can drive narrowPeak color mapping.""" diff --git a/plotnado/tracks/genes.py b/plotnado/tracks/genes.py index caa472a..b81d103 100644 --- a/plotnado/tracks/genes.py +++ b/plotnado/tracks/genes.py @@ -23,6 +23,24 @@ from .aesthetics import BaseAesthetics from .registry import registry +_user_genomes: dict[str, Path] = {} + + +def register_genome(name: str, path: str | Path) -> None: + """Register a BED12 or GTF file under a short genome name. + + Once registered, use it anywhere a bundled genome identifier is accepted:: + + import plotnado + plotnado.register_genome("mm10", "/data/mm10_refseq.bed") + fig.genes("mm10") # or genome: mm10 in YAML templates + + Args: + name: Short genome identifier (e.g. ``"mm10"``, ``"hg19"``). + path: Path to a BED12 or GTF gene annotation file (plain or bgzip-compressed). + """ + _user_genomes[name] = Path(path) + @dataclass class LabelPlacement: @@ -183,6 +201,9 @@ def _enum_token(value: object) -> str: return raw.lower() def _fetch_genes_from_package(self, gr: GenomicRegion) -> pd.DataFrame: + if self.genome in _user_genomes: + return self._fetch_from_disk_bed12(gr, _user_genomes[self.genome]) + try: bed_prefix = importlib.resources.files("plotnado.data.gene_bed_files") mapping_path = bed_prefix / "genes.json" @@ -196,7 +217,7 @@ def _fetch_genes_from_package(self, gr: GenomicRegion) -> pd.DataFrame: if self.genome not in gene_files: raise ValueError( f"Genome {self.genome} not found in the genes database. " - f"Available genomes: {list(gene_files.keys())}" + f"Available genomes: {list(gene_files.keys()) + list(_user_genomes.keys())}" ) gene_file = bed_prefix / gene_files[self.genome] @@ -287,8 +308,8 @@ def _fetch_from_disk_bed12( df["block_count"] = df["block_starts"].apply(lambda values: len(values) or 1) return df - def _fetch_from_disk_gtf(self, gr: GenomicRegion) -> pd.DataFrame: - gtf_df = read_gtf_regions(str(self.data), gr.chromosome, gr.start, gr.end) + def _fetch_from_disk_gtf(self, gr: GenomicRegion, file_path: Path | None = None) -> pd.DataFrame: + gtf_df = read_gtf_regions(str(file_path or self.data), gr.chromosome, gr.start, gr.end) if gtf_df.empty: return pd.DataFrame( columns=[ @@ -357,6 +378,22 @@ def fetch_data(self, gr: GenomicRegion) -> pd.DataFrame: raise ValueError("Either data or genome must be provided") if self.data is None: + # Allow genome= to accept a file path directly (absolute or known extension) + if self.genome and self.genome not in _user_genomes: + p = Path(self.genome) + genome_lower = self.genome.lower() + is_path = ( + p.is_absolute() + or genome_lower.endswith((".bed", ".bed.gz", ".gtf", ".gtf.gz")) + ) + if is_path: + if genome_lower.endswith(".gtf") or genome_lower.endswith(".gtf.gz"): + data = self._fetch_from_disk_gtf(gr, file_path=p) + else: + data = self._fetch_from_disk_bed12(gr, p) + if self.aesthetics.minimum_gene_length > 0 and not data.empty: + data = data.query(f"end - start >= {self.aesthetics.minimum_gene_length}") + return data data = self._fetch_genes_from_package(gr) elif isinstance(self.data, pd.DataFrame): data = self.data.copy() diff --git a/plotnado/tracks/peaks.py b/plotnado/tracks/peaks.py index 447140f..8277d83 100644 --- a/plotnado/tracks/peaks.py +++ b/plotnado/tracks/peaks.py @@ -88,6 +88,11 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: return row_scale = 1.0 / max(1, self.max_rows) + effective_height = ( + self.interval_height + if self.display == DisplayMode.COLLAPSED + else min(self.interval_height, row_scale * 0.85) + ) row_last_positions: list[int] = [] # Setup colormap if needed @@ -126,6 +131,12 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: else ((row_index + 0.5) * row_scale) ) + # Clip to region — intervals may extend beyond the viewed window + draw_start = max(float(start), float(gr.start)) + draw_end = min(float(end), float(gr.end)) + if draw_end <= draw_start: + continue + # Determine color current_color = self.color if cmap and norm and hasattr(row, self.color_by): @@ -134,9 +145,9 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: # Draw interval rect = matplotlib.patches.Rectangle( - (start, ypos - self.interval_height / 2), - end - start, - self.interval_height, + (draw_start, ypos - effective_height / 2), + draw_end - draw_start, + effective_height, linewidth=self.rect_linewidth if self.draw_edges else 0, edgecolor=self.edge_color if self.draw_edges else "none", facecolor=current_color, @@ -145,32 +156,31 @@ def plot(self, ax: matplotlib.axes.Axes, gr: GenomicRegion) -> None: ) ax.add_patch(rect) - # Draw summit if enabled + # Draw summit if enabled — skip if outside view if self.show_summit and hasattr(row, "peak") and row.peak != -1: - # peak is 0-based offset from start - summit_pos = start + row.peak - ax.plot( - [summit_pos, summit_pos], - [ - ypos - self.interval_height / 2, - ypos + self.interval_height / 2, - ], - color=self.summit_color, - linewidth=self.summit_width, - zorder=2, - ) + summit_pos = float(start) + float(row.peak) + if gr.start <= summit_pos <= gr.end: + ax.plot( + [summit_pos, summit_pos], + [ypos - effective_height / 2, ypos + effective_height / 2], + color=self.summit_color, + linewidth=self.summit_width, + zorder=2, + ) # Draw label if enabled if self.show_labels and hasattr(row, self.label_field): label = getattr(row, self.label_field) + label_xpos = max(draw_start, min(draw_end, (float(start) + float(end)) / 2)) ax.text( - (start + end) / 2, + label_xpos, ypos, str(label), ha="center", va="center", fontsize=self.font_size, zorder=3, + clip_on=True, ) ax.set_xlim(gr.start, gr.end) diff --git a/pyproject.toml b/pyproject.toml index 1abf425..8ab9c3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ where = ["."] include = ["plotnado*"] [tool.setuptools.package-data] -plotnado = ["data/gene_bed_files/*", "py.typed"] +plotnado = ["data/gene_bed_files/*", "data/*.bed", "py.typed"] diff --git a/tests/test_genes.py b/tests/test_genes.py index e659839..c9d658b 100644 --- a/tests/test_genes.py +++ b/tests/test_genes.py @@ -5,8 +5,10 @@ import matplotlib.markers import matplotlib.pyplot as plt import pandas as pd +import pytest from plotnado.tracks import Genes, GenesAesthetics, GenomicRegion +from plotnado.tracks.genes import _user_genomes, register_genome def _chevron_calls(ax: MagicMock) -> list: @@ -751,3 +753,98 @@ def test_plot_genes_expanded_row_allocation_accounts_for_label_footprint( y_values = [call.args[1] for call in mock_ax.text.call_args_list] assert len(y_values) == 2 assert len(set(y_values)) == 2 + + +class TestRegisterGenome: + @pytest.fixture(autouse=True) + def _clean_registry(self): + """Restore _user_genomes to its pre-test state after each test.""" + snapshot = dict(_user_genomes) + yield + _user_genomes.clear() + _user_genomes.update(snapshot) + + @patch("plotnado.tracks.genes.read_bed_regions") + def test_register_and_use_genome(self, mock_read_bed, tmp_path, genomic_region): + bed_file = tmp_path / "custom.bed" + bed_file.write_text("") + mock_read_bed.return_value = pd.DataFrame( + { + "chrom": ["chr1"], + "start": [1100], + "end": [1300], + "name": ["gene1"], + "strand": ["+"], + "blockCount": [1], + "blockSizes": ["200"], + "blockStarts": ["0"], + } + ) + + register_genome("custom_asm", bed_file) + genes = Genes(genome="custom_asm") + result = genes.fetch_data(genomic_region) + + mock_read_bed.assert_called_once() + assert "geneid" in result.columns + + @patch("plotnado.tracks.genes.read_bed_regions") + def test_path_autodetect_absolute_bed(self, mock_read_bed, tmp_path, genomic_region): + bed_file = tmp_path / "mm10_genes.bed" + bed_file.write_text("") + mock_read_bed.return_value = pd.DataFrame( + { + "chrom": ["chr1"], + "start": [1100], + "end": [1300], + "name": ["gene1"], + "strand": ["+"], + "blockCount": [1], + "blockSizes": ["200"], + "blockStarts": ["0"], + } + ) + + genes = Genes(genome=str(bed_file)) + result = genes.fetch_data(genomic_region) + + mock_read_bed.assert_called_once() + assert "geneid" in result.columns + + @patch("plotnado.tracks.genes.read_gtf_regions") + def test_path_autodetect_gtf_extension(self, mock_read_gtf, tmp_path, genomic_region): + gtf_file = tmp_path / "custom.gtf" + gtf_file.write_text("") + mock_read_gtf.return_value = pd.DataFrame( + { + "Chromosome": ["chr1", "chr1"], + "Start": [1100, 1200], + "End": [1150, 1250], + "Feature": ["exon", "exon"], + "gene_id": ["g1", "g1"], + "Strand": ["+", "+"], + } + ) + + genes = Genes(genome=str(gtf_file)) + result = genes.fetch_data(genomic_region) + + mock_read_gtf.assert_called_once() + assert "geneid" in result.columns + + def test_unknown_genome_raises_valueerror(self, genomic_region): + genes = Genes(genome="nonexistent_assembly_xyz") + with pytest.raises(ValueError, match="not found in the genes database"): + genes.fetch_data(genomic_region) + + def test_error_message_includes_user_genomes(self, tmp_path, genomic_region): + register_genome("my_asm", tmp_path / "fake.bed") + genes = Genes(genome="wrong_name") + with pytest.raises(ValueError) as exc_info: + genes.fetch_data(genomic_region) + assert "my_asm" in str(exc_info.value) + + def test_register_genome_exported_from_package(self): + import plotnado + assert hasattr(plotnado, "register_genome") + assert callable(plotnado.register_genome) diff --git a/uv.lock b/uv.lock index d38f440..6b8d2dd 100644 --- a/uv.lock +++ b/uv.lock @@ -2786,7 +2786,7 @@ wheels = [ [[package]] name = "plotnado" -version = "0.4.dev14" +version = "0.4.dev15" source = { editable = "." } dependencies = [ { name = "loguru" }, From a61be4213689544be482c3f296f4b80f516254b6 Mon Sep 17 00:00:00 2001 From: alsmith151 Date: Wed, 27 May 2026 13:17:54 +0100 Subject: [PATCH 2/2] docs: update example plot command in aesthetics documentation --- docs/aesthetics.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/aesthetics.qmd b/docs/aesthetics.qmd index 5a9192e..11e3072 100644 --- a/docs/aesthetics.qmd +++ b/docs/aesthetics.qmd @@ -175,7 +175,7 @@ fig = GenomicFigure(theme="minimal") fig.scalebar() fig.bigwig(signal(), title="Minimal theme", style="fill", color="#1f77b4") fig.axis() -fig.plot("chr1:1,000,000-1,100,000", show=False) # Note: this is just for demonstration; the theme doesn't affect the plot command, but it does affect the default styling of the figure and tracks. +fig.plot("chr1:1,000,000-1,100,000", show=False) ```