Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ native, format-agnostic metadata embedding for images. Core dependencies remain

### Added

- **New attribute dtypes** `date`, `time`, `duration` and `decimal` (pure stdlib):
`date`/`time` (`xsd:date`/`xsd:time`), `duration` as a `datetime.timedelta` parsed
from ISO 8601 (`xsd:duration`), and `decimal` as `decimal.Decimal` for exact numerics
(`xsd:decimal`). Round-trip through JSON and JSON-LD; lenient/`strict=` coercion as
for the existing dtypes.
- **New attribute dtypes** `date`, `time`, `duration`, `decimal`, `complex` and
`floatlist` (pure stdlib): `date`/`time` (`xsd:date`/`xsd:time`), `duration` as a
`datetime.timedelta` parsed from ISO 8601 (`xsd:duration`), `decimal` as
`decimal.Decimal` for exact numerics (`xsd:decimal`), `complex` numbers and
`floatlist` (typed `list[float]`, also from numpy arrays) — the latter two use the
custom datatype CURIEs `sdata:complex` / `sdata:floatlist` (no standard XSD type) for
a lossless JSON-LD round-trip. Lenient/`strict=` coercion as for the existing dtypes.
- **Native image metadata (RFC 0005).** New pure-Python, Pillow-free module
`sdata.imagemeta` embeds/reads sdata metadata **natively** into six containers with
one API (`detect_format`/`embed`/`extract`/`supported_formats`): **PNG** (`iTXt`),
Expand Down
10 changes: 6 additions & 4 deletions docs/usage/metadata-jsonld.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ pip install "sdata[schema]" # jsonschema -> JSON-Schema validation
Every [`Attribute`][sdata.metadata.Attribute] carries
`name, value, unit, dtype, description, label, required, ontology`. Supported
`dtype` values are `str, int, float, bool, list, timestamp, bytes, json, uri,
date, time, duration, decimal`. Each maps to an XSD type for JSON-LD (e.g. `date`
→ `xsd:date`, `duration` → `xsd:duration` as ISO 8601 / `timedelta`, `decimal` →
`xsd:decimal` for exact numerics). Coercion is lenient by default; pass
`strict=True` to raise `sdata.dtypes.DtypeError` on invalid values.
date, time, duration, decimal, complex, floatlist`. Each maps to an XSD type for
JSON-LD (e.g. `date` → `xsd:date`, `duration` → `xsd:duration` as ISO 8601 /
`timedelta`, `decimal` → `xsd:decimal` for exact numerics); `complex` and
`floatlist` (a typed `list[float]`) have no standard XSD type and use the custom
datatype CURIEs `sdata:complex` / `sdata:floatlist`. Coercion is lenient by
default; pass `strict=True` to raise `sdata.dtypes.DtypeError` on invalid values.

```python
import pandas as pd
Expand Down
50 changes: 48 additions & 2 deletions sdata/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
* **Strikt opt-in** – ``strict=True`` wirft ``DtypeError`` statt still zu
degradieren.
* **Erweiterbar** – neben den 6 Alt-dtypes (str/int/float/bool/timestamp/list)
zusätzlich ``bytes`` (base64), ``json`` (dict/list), ``uri`` sowie ``date``,
``time``, ``duration`` (ISO 8601 / ``timedelta``) und ``decimal`` (exakt).
zusätzlich ``bytes`` (base64), ``json`` (dict/list), ``uri``, ``date``, ``time``,
``duration`` (ISO 8601 / ``timedelta``), ``decimal`` (exakt), ``complex`` sowie
``floatlist`` (typisierte Float-Liste). ``complex``/``floatlist`` haben keinen
Standard-XSD-Typ und nutzen daher eigene Datentyp-CURIEs (``sdata:complex`` /
``sdata:floatlist``).
"""
import base64
import binascii
Expand Down Expand Up @@ -235,6 +238,36 @@
raise DtypeError("decimal: {!r}".format(value)) from exp


def _c_complex(value, strict):
if value is None or value == "":
return None
if isinstance(value, complex):
return value
if isinstance(value, bool): # bool ist kein komplexer Wert
raise DtypeError("complex: {!r}".format(value))
try:
return complex(value.strip()) if isinstance(value, str) else complex(value)
except (ValueError, TypeError) as exp:
raise DtypeError("complex: {!r}".format(value)) from exp


def _c_floatlist(value, strict):

Check warning on line 254 in sdata/dtypes.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

sdata/dtypes.py#L254

Method _c_floatlist has a cyclomatic complexity of 10 (limit is 8)
if value is None:
return []
if isinstance(value, str): # "" / "1,2,3" (Leer-Check skalar-sicher)
items = [s for s in (p.strip() for p in value.split(",")) if s]
elif isinstance(value, (list, tuple)):
items = value
elif hasattr(value, "tolist"): # numpy-Array & Co. -> Liste
items = value.tolist()
else:
raise DtypeError("floatlist: {!r}".format(value))
try:
return [float(x) for x in items]
except (ValueError, TypeError) as exp:
raise DtypeError("floatlist: {!r}".format(value)) from exp


# --- JSON-Serialisierung je dtype -------------------------------------------
def _ts_to_json(value):
return str(value.utc) if isinstance(value, TimeStamp) else value
Expand Down Expand Up @@ -280,6 +313,10 @@
return str(value) if isinstance(value, Decimal) else value


def _complex_to_json(value):
return str(value) if isinstance(value, complex) else value


class DtypeSpec:
"""Beschreibt einen dtype: Coercion, JSON-Repräsentation, Klasse, XSD-Typ."""

Expand Down Expand Up @@ -318,6 +355,10 @@
DtypeSpec("time", datetime.time, _c_time, "xsd:time", _time_to_json),
DtypeSpec("duration", datetime.timedelta, _c_duration, "xsd:duration", _duration_to_json),
DtypeSpec("decimal", Decimal, _c_decimal, "xsd:decimal", _decimal_to_json),
# komplexe Zahlen & typisierte Float-Listen haben keinen Standard-XSD-Typ
# -> eigener Datentyp-CURIE in der sdata-Namespace (verlustfreier JSON-LD-Roundtrip).
DtypeSpec("complex", complex, _c_complex, "sdata:complex", _complex_to_json),
DtypeSpec("floatlist", list, _c_floatlist, "sdata:floatlist"),
]:
register(_spec)

Expand All @@ -341,6 +382,7 @@
bytes: "bytes", dict: "json",
datetime.date: "date", datetime.time: "time",
datetime.timedelta: "duration", Decimal: "decimal",
complex: "complex",
}
XSD = {name: spec.xsd for name, spec in _REGISTRY.items()}

Expand All @@ -364,6 +406,8 @@
token = str(dtype).strip().lower()
if token in _REGISTRY:
return token
if token in ("list[float]", "float[]"): # Alias -> floatlist (vor 'list'/'float')
return "floatlist"
if "float" in token:
return "float"
if "int" in token:
Expand All @@ -390,6 +434,8 @@
return base64.b64encode(bytes(obj)).decode("ascii")
if isinstance(obj, Decimal):
return str(obj)
if isinstance(obj, complex):
return str(obj)
if isinstance(obj, datetime.timedelta):
return _duration_to_json(obj)
if isinstance(obj, datetime.date): # fängt auch datetime.datetime
Expand Down
3 changes: 2 additions & 1 deletion sdata/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"xsd:base64Binary": "bytes", "xsd:anyURI": "uri",
"xsd:date": "date", "xsd:time": "time",
"xsd:duration": "duration", "xsd:decimal": "decimal",
"sdata:complex": "complex",
}


Expand Down Expand Up @@ -190,7 +191,7 @@ def _set_from_node(metadata, name, node):
raw, xsd = node, None
# dtype bestimmen: JSON-Typ hat Vorrang (list/json), sonst XSD-Rückabbildung
if isinstance(raw, list):
dtype = "list"
dtype = "floatlist" if xsd == "sdata:floatlist" else "list"
elif isinstance(raw, dict):
dtype = "json"
else:
Expand Down
72 changes: 72 additions & 0 deletions tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,78 @@
assert back.get("price").value == Decimal("19.99")


# --- complex / floatlist (neu) ---------------------------------------------
def test_complex_dtype():
assert Attribute("c", "1+2j", dtype="complex").value == complex(1, 2)
assert Attribute("c", "(1+2j)", dtype="complex").value == complex(1, 2) # mit Klammern
assert Attribute("c", complex(3, -4), dtype="complex").value == complex(3, -4)
assert Attribute("c", 5, dtype="complex").value == complex(5, 0)
assert Attribute("c", 2.5, dtype="complex").value == complex(2.5, 0)
assert Attribute("c", None, dtype="complex").value is None
assert Attribute("c", "", dtype="complex").value is None
assert Attribute("c", "nope", dtype="complex").value is None # lenient
with pytest.raises(DtypeError):
Attribute("c", "nope", dtype="complex", strict=True)
with pytest.raises(DtypeError):
Attribute("c", True, dtype="complex", strict=True) # bool abgelehnt


def test_floatlist_dtype():
import numpy as np

Check notice on line 295 in tests/test_dtypes.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

tests/test_dtypes.py#L295

Reimport 'numpy' (imported line 5)
assert Attribute("v", "1.0, 2.5, 3", dtype="floatlist").value == [1.0, 2.5, 3.0]
assert Attribute("v", [1, 2, 3], dtype="floatlist").value == [1.0, 2.0, 3.0]
assert Attribute("v", (1.5, 2.5), dtype="floatlist").value == [1.5, 2.5]
assert Attribute("v", np.array([1, 2, 3]), dtype="floatlist").value == [1.0, 2.0, 3.0]
assert Attribute("v", None, dtype="floatlist").value == []
assert Attribute("v", "", dtype="floatlist").value == []
# dtype-Alias "list[float]"
assert Attribute("v", [1, 2], dtype="list[float]").value == [1.0, 2.0]
# nicht-castbare Elemente / unzulässiger Typ -> lenient None (Wert unverändert), strict raises
assert Attribute("v", ["a", "b"], dtype="floatlist").value is None
assert Attribute("v", 5, dtype="floatlist").value is None
with pytest.raises(DtypeError):
Attribute("v", ["a"], dtype="floatlist", strict=True) # nicht-castbares Element
with pytest.raises(DtypeError):
Attribute("v", 5, dtype="floatlist", strict=True) # unzulässiger Typ


def test_complex_floatlist_resolve_xsd_json():
assert dtypes.resolve(complex) == "complex"
assert dtypes.resolve("complex") == "complex"
assert dtypes.resolve("floatlist") == "floatlist"
assert dtypes.resolve("list[float]") == "floatlist"
assert dtypes.resolve("float[]") == "floatlist"
xsd = dtypes.xsd_map()
assert xsd["complex"] == "sdata:complex" and xsd["floatlist"] == "sdata:floatlist"
# to_json / json_default
assert dtypes.get("complex").to_json(complex(1, 2)) == "(1+2j)"
assert dtypes.get("complex").to_json(None) is None # passthrough
assert dtypes.get("floatlist").to_json([1.0, 2.0]) == [1.0, 2.0] # passthrough (JSON-nativ)
assert dtypes.json_default(complex(1, 2)) == "(1+2j)"


def test_complex_floatlist_json_roundtrip():
m = Metadata()
m.add("impedance", "50+3j", dtype="complex")
m.add("spectrum", [1.0, 2.5, 3.0], dtype="floatlist")
restored = Metadata.from_json(m.to_json())
assert restored.get("impedance").value == complex(50, 3)
assert restored.get("spectrum").value == [1.0, 2.5, 3.0]


def test_complex_floatlist_jsonld_roundtrip():
from sdata import semantic
m = Metadata(name="probe")
m.add("impedance", "50+3j", dtype="complex")
m.add("spectrum", [1.0, 2.5, 3.0], dtype="floatlist")
doc = semantic.to_jsonld(m)
assert doc["sdata:impedance"]["@type"] == "sdata:complex"
assert doc["sdata:spectrum"]["@type"] == "sdata:floatlist"
back = semantic.from_jsonld(doc)
assert back.get("impedance").value == complex(50, 3)
assert back.get("spectrum").value == [1.0, 2.5, 3.0] # floatlist, nicht str-list


# --- dtype=class & Re-Cast --------------------------------------------------
def test_dtype_class_accepted():
assert Attribute("a", 1, dtype=int).dtype == "int"
Expand Down