Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/api_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ and
:members:
```

### Grid Overrides

```{eval-rst}
.. autopydantic_model:: mdio.GridOverrides
```

## Core Functionality

### Dimensions
Expand Down
29 changes: 28 additions & 1 deletion docs/guides/grid_overrides.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@ Grid overrides are transformations applied during SEG-Y import that modify how t

When importing SEG-Y data, MDIO maps trace header fields to dataset dimensions. However, real-world seismic data often has complexities that require additional processing. Grid overrides address these issues by transforming header values before indexing.

## Configuring grid overrides

Grid overrides are passed to {func}`mdio.segy_to_mdio` via the `grid_overrides` argument as an
{class}`mdio.GridOverrides` instance:

```python
from mdio import GridOverrides
from mdio import segy_to_mdio

segy_to_mdio(
...,
grid_overrides=GridOverrides(calculate_shot_index=True),
)
```

Both modern `snake_case` field names and the legacy `CamelCase` aliases are accepted, so
`GridOverrides(CalculateShotIndex=True)` is equivalent to the example above. Unknown keys
are rejected at construction with a `pydantic.ValidationError`.

```{deprecated} 1.2
Passing `grid_overrides` as a `dict` still works but logs a deprecation warning and will be
removed in a future release. Switch to `mdio.GridOverrides`.
```

## CalculateShotIndex

Calculates a dense `shot_index` dimension from sparse or interleaved `shot_point` values. Required for the `ObnReceiverGathers3D` template.
Expand Down Expand Up @@ -37,12 +61,15 @@ The override detects the geometry type and only applies the transformation when
**Usage:**

```python
from mdio import GridOverrides
from mdio import segy_to_mdio

segy_to_mdio(
input_path="obn_data.sgy",
output_path="obn_data.mdio",
segy_spec=obn_spec,
mdio_template=get_template("ObnReceiverGathers3D"),
grid_overrides={"CalculateShotIndex": True},
grid_overrides=GridOverrides(calculate_shot_index=True),
)
```

Expand Down
3 changes: 2 additions & 1 deletion docs/guides/obn_data_import.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ A warning is logged when component is synthesized:
from segy.schema import HeaderField
from segy.standards import get_segy_standard

from mdio import GridOverrides
from mdio import segy_to_mdio
from mdio.builder.template_registry import get_template

Expand All @@ -91,7 +92,7 @@ segy_to_mdio(
output_path="obn_data.mdio",
segy_spec=obn_spec,
mdio_template=get_template("ObnReceiverGathers3D"),
grid_overrides={"CalculateShotIndex": True},
grid_overrides=GridOverrides(calculate_shot_index=True),
overwrite=True,
)
```
Expand Down
2 changes: 2 additions & 0 deletions src/mdio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mdio.converters import segy_to_mdio
from mdio.optimize.access_pattern import OptimizedAccessPatternConfig
from mdio.optimize.access_pattern import optimize_access_patterns
from mdio.segy.geometry import GridOverrides

try:
__version__ = metadata.version("multidimio")
Expand All @@ -19,6 +20,7 @@

__all__ = [
"__version__",
"GridOverrides",
"open_mdio",
"to_mdio",
"mdio_to_segy",
Expand Down
70 changes: 47 additions & 23 deletions src/mdio/converters/segy.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from mdio.ingestion.segy.validation import _validate_spec_in_template
from mdio.segy import blocked_io
from mdio.segy.file import get_segy_file_info
from mdio.segy.geometry import GridOverrides
from mdio.segy.utilities import get_grid_plan

if TYPE_CHECKING:
Expand Down Expand Up @@ -128,7 +129,7 @@ def filtered_add_coordinate( # noqa: ANN202

def _update_template_from_grid_overrides(
template: AbstractDatasetTemplate,
grid_overrides: dict[str, Any] | None,
grid_overrides: GridOverrides | None,
segy_dimensions: list[Dimension],
full_chunk_shape: tuple[int, ...],
chunk_size: tuple[int, ...],
Expand Down Expand Up @@ -178,30 +179,29 @@ def _update_template_from_grid_overrides(

# If using NonBinned override, expose non-binned dims as logical coordinates on the template instance
# and patch _add_coordinates to skip adding them as 1D dimension coordinates
if grid_overrides and "NonBinned" in grid_overrides and "non_binned_dims" in grid_overrides:
non_binned_dims = tuple(grid_overrides["non_binned_dims"])
if non_binned_dims:
logger.debug(
"NonBinned grid override: exposing non-binned dims as coordinates: %s",
non_binned_dims,
)
# Append any missing names; keep existing order and avoid duplicates
existing = set(template.coordinate_names)
to_add = tuple(n for n in non_binned_dims if n not in existing)
if to_add:
template._logical_coord_names = template._logical_coord_names + to_add

# Patch _add_coordinates to skip adding non-binned dims as 1D dimension coordinates
# This prevents them from being added with wrong dimensions (e.g., just "trace")
# They will be added later by build_dataset with full spatial_dimension_names
_patch_add_coordinates_for_non_binned(template, set(non_binned_dims))
if grid_overrides is not None and grid_overrides.non_binned and grid_overrides.non_binned_dims:
non_binned_dims = tuple(grid_overrides.non_binned_dims)
logger.debug(
"NonBinned grid override: exposing non-binned dims as coordinates: %s",
non_binned_dims,
)
# Append any missing names; keep existing order and avoid duplicates
existing = set(template.coordinate_names)
to_add = tuple(n for n in non_binned_dims if n not in existing)
if to_add:
template._logical_coord_names = template._logical_coord_names + to_add

# Patch _add_coordinates to skip adding non-binned dims as 1D dimension coordinates
# This prevents them from being added with wrong dimensions (e.g., just "trace")
# They will be added later by build_dataset with full spatial_dimension_names
_patch_add_coordinates_for_non_binned(template, set(non_binned_dims))


def _scan_for_headers(
segy_file_kwargs: SegyFileArguments,
segy_file_info: SegyFileInfo,
template: AbstractDatasetTemplate,
grid_overrides: dict[str, Any] | None = None,
grid_overrides: GridOverrides | None = None,
) -> tuple[list[Dimension], SegyHeaderArray]:
"""Extract trace dimensions and index headers from the SEG-Y file.

Expand Down Expand Up @@ -346,13 +346,34 @@ def determine_target_size(var_type: str) -> int:
ds.variables[index].metadata.chunk_grid = chunk_grid


def _coerce_grid_overrides(
grid_overrides: GridOverrides | dict[str, Any] | None,
) -> GridOverrides | None:
"""Normalize public ``grid_overrides`` input into a :class:`GridOverrides` model.

The internal ingestion pipeline only accepts the typed model. A legacy ``dict`` is
converted via :meth:`GridOverrides.from_legacy_dict` and a deprecation message is logged.
"""
if grid_overrides is None:
return None

if isinstance(grid_overrides, GridOverrides):
return grid_overrides

logger.warning(
"Passing `grid_overrides` as a dict is deprecated and will be removed in a "
"future release; pass a `mdio.GridOverrides` instance instead."
)
return GridOverrides.model_validate(grid_overrides)


def segy_to_mdio( # noqa PLR0913
segy_spec: SegySpec,
mdio_template: AbstractDatasetTemplate,
input_path: UPath | Path | str,
output_path: UPath | Path | str,
overwrite: bool = False,
grid_overrides: dict[str, Any] | None = None,
grid_overrides: GridOverrides | dict[str, Any] | None = None,
segy_header_overrides: SegyHeaderOverrides | None = None,
) -> None:
"""A function that converts a SEG-Y file to an MDIO v1 file.
Expand All @@ -365,12 +386,15 @@ def segy_to_mdio( # noqa PLR0913
input_path: The universal path of the input SEG-Y file.
output_path: The universal path for the output MDIO v1 file.
overwrite: Whether to overwrite the output file if it already exists. Defaults to False.
grid_overrides: Option to add grid overrides.
grid_overrides: Option to add grid overrides. Prefer a :class:`mdio.GridOverrides`
instance; ``dict`` is still accepted but emits a :class:`DeprecationWarning`.
segy_header_overrides: Option to override specific SEG-Y headers during ingestion.

Raises:
FileExistsError: If the output location already exists and overwrite is False.
"""
typed_grid_overrides = _coerce_grid_overrides(grid_overrides)

settings = MDIOSettings()

_validate_spec_in_template(segy_spec, mdio_template)
Expand All @@ -395,7 +419,7 @@ def segy_to_mdio( # noqa PLR0913
segy_file_kwargs,
segy_file_info,
template=mdio_template,
grid_overrides=grid_overrides,
grid_overrides=typed_grid_overrides,
)
grid = _build_and_check_grid(segy_dimensions, segy_file_info, segy_headers)

Expand All @@ -417,7 +441,7 @@ def segy_to_mdio( # noqa PLR0913
mdio_template = _update_template_units(mdio_template, spatial_unit)
mdio_ds: Dataset = mdio_template.build_dataset(name=mdio_template.name, sizes=grid.shape, header_dtype=header_dtype)

_add_grid_override_to_metadata(dataset=mdio_ds, grid_overrides=grid_overrides)
_add_grid_override_to_metadata(dataset=mdio_ds, grid_overrides=typed_grid_overrides)

# Dynamically chunk the variables based on their type
_chunk_variable(ds=mdio_ds, target_variable_name="trace_mask") # trace_mask is a Variable and not a Coordinate
Expand Down
6 changes: 3 additions & 3 deletions src/mdio/ingestion/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any

if TYPE_CHECKING:
from mdio.builder.schemas import Dataset
from mdio.segy.geometry import GridOverrides


def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: dict[str, Any] | None) -> None:
def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: GridOverrides | None) -> None:
"""Add grid override to Dataset metadata if needed."""
if dataset.metadata.attributes is None:
dataset.metadata.attributes = {}

if grid_overrides is not None:
dataset.metadata.attributes["gridOverrides"] = grid_overrides
dataset.metadata.attributes["gridOverrides"] = grid_overrides.to_legacy_dict()
59 changes: 59 additions & 0 deletions src/mdio/segy/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
from abc import ABC
from abc import abstractmethod
from typing import TYPE_CHECKING
from typing import Any

import numpy as np
from numpy.lib import recfunctions as rfn
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field

from mdio.ingestion.segy.header_analysis import ShotGunGeometryType
from mdio.ingestion.segy.header_analysis import StreamerShotGeometryType
Expand All @@ -31,6 +35,61 @@
logger = logging.getLogger(__name__)


class GridOverrides(BaseModel):
"""Type-safe configuration for grid override operations during SEG-Y ingestion."""

model_config = ConfigDict(extra="forbid", validate_by_name=True)

auto_channel_wrap: bool = Field(
default=False,
alias="AutoChannelWrap",
description="Streamer: auto-detect channel-wrap geometry (Type A vs B).",
)
auto_shot_wrap: bool = Field(
default=False,
alias="AutoShotWrap",
description="Streamer: derive dense shot_index from interleaved shot_point values.",
)
calculate_shot_index: bool = Field(
default=False,
alias="CalculateShotIndex",
description="OBN: derive dense shot_index from sparse shot_point values per shot_line.",
)
non_binned: bool = Field(
default=False,
alias="NonBinned",
description="Collapse selected dims into a single trace dimension without spatial binning.",
)
has_duplicates: bool = Field(
default=False,
alias="HasDuplicates",
description="Add a trace dimension (chunksize 1) to disambiguate duplicate trace indices.",
)
chunksize: int | None = Field(
default=None,
gt=0,
description="Chunk size for the trace dimension when `non_binned` is True.",
)
non_binned_dims: list[str] | None = Field(
default=None,
description="Dimension names to collapse into the trace dimension when `non_binned` is True.",
)

def __bool__(self) -> bool:
"""Return True if any override flag is enabled."""
return (
self.auto_channel_wrap
or self.auto_shot_wrap
or self.calculate_shot_index
or self.non_binned
or self.has_duplicates
)

def to_legacy_dict(self) -> dict[str, Any]:
"""Dump to the legacy ``CamelCase`` dict shape consumed by :class:`GridOverrider`."""
return self.model_dump(by_alias=True, exclude_defaults=True)


class GridOverrideCommand(ABC):
"""Abstract base class for grid override commands."""

Expand Down
Loading
Loading