diff --git a/CHANGELOG.md b/CHANGELOG.md index 6421f39..787a0a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,11 +15,13 @@ native, format-agnostic metadata embedding for images. Core dependencies remain ### Added -- **New attribute dtypes** `date`, `time`, `duration` and `decimal` (pure stdlib): - `date`/`time` (`xsd:date`/`xsd:time`), `duration` as a `datetime.timedelta` parsed - from ISO 8601 (`xsd:duration`), and `decimal` as `decimal.Decimal` for exact numerics - (`xsd:decimal`). Round-trip through JSON and JSON-LD; lenient/`strict=` coercion as - for the existing dtypes. +- **New attribute dtypes** `date`, `time`, `duration`, `decimal`, `complex` and + `floatlist` (pure stdlib): `date`/`time` (`xsd:date`/`xsd:time`), `duration` as a + `datetime.timedelta` parsed from ISO 8601 (`xsd:duration`), `decimal` as + `decimal.Decimal` for exact numerics (`xsd:decimal`), `complex` numbers and + `floatlist` (typed `list[float]`, also from numpy arrays) — the latter two use the + custom datatype CURIEs `sdata:complex` / `sdata:floatlist` (no standard XSD type) for + a lossless JSON-LD round-trip. Lenient/`strict=` coercion as for the existing dtypes. - **Native image metadata (RFC 0005).** New pure-Python, Pillow-free module `sdata.imagemeta` embeds/reads sdata metadata **natively** into six containers with one API (`detect_format`/`embed`/`extract`/`supported_formats`): **PNG** (`iTXt`), diff --git a/docs/usage/metadata-jsonld.md b/docs/usage/metadata-jsonld.md index 49d4e4a..dedca91 100644 --- a/docs/usage/metadata-jsonld.md +++ b/docs/usage/metadata-jsonld.md @@ -19,10 +19,12 @@ pip install "sdata[schema]" # jsonschema -> JSON-Schema validation Every [`Attribute`][sdata.metadata.Attribute] carries `name, value, unit, dtype, description, label, required, ontology`. Supported `dtype` values are `str, int, float, bool, list, timestamp, bytes, json, uri, -date, time, duration, decimal`. Each maps to an XSD type for JSON-LD (e.g. `date` -→ `xsd:date`, `duration` → `xsd:duration` as ISO 8601 / `timedelta`, `decimal` → -`xsd:decimal` for exact numerics). Coercion is lenient by default; pass -`strict=True` to raise `sdata.dtypes.DtypeError` on invalid values. +date, time, duration, decimal, complex, floatlist`. Each maps to an XSD type for +JSON-LD (e.g. `date` → `xsd:date`, `duration` → `xsd:duration` as ISO 8601 / +`timedelta`, `decimal` → `xsd:decimal` for exact numerics); `complex` and +`floatlist` (a typed `list[float]`) have no standard XSD type and use the custom +datatype CURIEs `sdata:complex` / `sdata:floatlist`. Coercion is lenient by +default; pass `strict=True` to raise `sdata.dtypes.DtypeError` on invalid values. ```python import pandas as pd diff --git a/sdata/dtypes.py b/sdata/dtypes.py index 8aefcce..37c9e25 100644 --- a/sdata/dtypes.py +++ b/sdata/dtypes.py @@ -15,8 +15,11 @@ * **Strikt opt-in** – ``strict=True`` wirft ``DtypeError`` statt still zu degradieren. * **Erweiterbar** – neben den 6 Alt-dtypes (str/int/float/bool/timestamp/list) - zusätzlich ``bytes`` (base64), ``json`` (dict/list), ``uri`` sowie ``date``, - ``time``, ``duration`` (ISO 8601 / ``timedelta``) und ``decimal`` (exakt). + zusätzlich ``bytes`` (base64), ``json`` (dict/list), ``uri``, ``date``, ``time``, + ``duration`` (ISO 8601 / ``timedelta``), ``decimal`` (exakt), ``complex`` sowie + ``floatlist`` (typisierte Float-Liste). ``complex``/``floatlist`` haben keinen + Standard-XSD-Typ und nutzen daher eigene Datentyp-CURIEs (``sdata:complex`` / + ``sdata:floatlist``). """ import base64 import binascii @@ -235,6 +238,36 @@ def _c_decimal(value, strict): raise DtypeError("decimal: {!r}".format(value)) from exp +def _c_complex(value, strict): + if value is None or value == "": + return None + if isinstance(value, complex): + return value + if isinstance(value, bool): # bool ist kein komplexer Wert + raise DtypeError("complex: {!r}".format(value)) + try: + return complex(value.strip()) if isinstance(value, str) else complex(value) + except (ValueError, TypeError) as exp: + raise DtypeError("complex: {!r}".format(value)) from exp + + +def _c_floatlist(value, strict): + if value is None: + return [] + if isinstance(value, str): # "" / "1,2,3" (Leer-Check skalar-sicher) + items = [s for s in (p.strip() for p in value.split(",")) if s] + elif isinstance(value, (list, tuple)): + items = value + elif hasattr(value, "tolist"): # numpy-Array & Co. -> Liste + items = value.tolist() + else: + raise DtypeError("floatlist: {!r}".format(value)) + try: + return [float(x) for x in items] + except (ValueError, TypeError) as exp: + raise DtypeError("floatlist: {!r}".format(value)) from exp + + # --- JSON-Serialisierung je dtype ------------------------------------------- def _ts_to_json(value): return str(value.utc) if isinstance(value, TimeStamp) else value @@ -280,6 +313,10 @@ def _decimal_to_json(value): return str(value) if isinstance(value, Decimal) else value +def _complex_to_json(value): + return str(value) if isinstance(value, complex) else value + + class DtypeSpec: """Beschreibt einen dtype: Coercion, JSON-Repräsentation, Klasse, XSD-Typ.""" @@ -318,6 +355,10 @@ def register(spec): DtypeSpec("time", datetime.time, _c_time, "xsd:time", _time_to_json), DtypeSpec("duration", datetime.timedelta, _c_duration, "xsd:duration", _duration_to_json), DtypeSpec("decimal", Decimal, _c_decimal, "xsd:decimal", _decimal_to_json), + # komplexe Zahlen & typisierte Float-Listen haben keinen Standard-XSD-Typ + # -> eigener Datentyp-CURIE in der sdata-Namespace (verlustfreier JSON-LD-Roundtrip). + DtypeSpec("complex", complex, _c_complex, "sdata:complex", _complex_to_json), + DtypeSpec("floatlist", list, _c_floatlist, "sdata:floatlist"), ]: register(_spec) @@ -341,6 +382,7 @@ def names(): bytes: "bytes", dict: "json", datetime.date: "date", datetime.time: "time", datetime.timedelta: "duration", Decimal: "decimal", + complex: "complex", } XSD = {name: spec.xsd for name, spec in _REGISTRY.items()} @@ -364,6 +406,8 @@ def resolve(dtype): token = str(dtype).strip().lower() if token in _REGISTRY: return token + if token in ("list[float]", "float[]"): # Alias -> floatlist (vor 'list'/'float') + return "floatlist" if "float" in token: return "float" if "int" in token: @@ -390,6 +434,8 @@ def json_default(obj): return base64.b64encode(bytes(obj)).decode("ascii") if isinstance(obj, Decimal): return str(obj) + if isinstance(obj, complex): + return str(obj) if isinstance(obj, datetime.timedelta): return _duration_to_json(obj) if isinstance(obj, datetime.date): # fängt auch datetime.datetime diff --git a/sdata/semantic.py b/sdata/semantic.py index 65a5a89..107976e 100644 --- a/sdata/semantic.py +++ b/sdata/semantic.py @@ -54,6 +54,7 @@ "xsd:base64Binary": "bytes", "xsd:anyURI": "uri", "xsd:date": "date", "xsd:time": "time", "xsd:duration": "duration", "xsd:decimal": "decimal", + "sdata:complex": "complex", } @@ -190,7 +191,7 @@ def _set_from_node(metadata, name, node): raw, xsd = node, None # dtype bestimmen: JSON-Typ hat Vorrang (list/json), sonst XSD-Rückabbildung if isinstance(raw, list): - dtype = "list" + dtype = "floatlist" if xsd == "sdata:floatlist" else "list" elif isinstance(raw, dict): dtype = "json" else: diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 9d06345..47900d7 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -275,6 +275,78 @@ def test_new_dtypes_jsonld_roundtrip(): assert back.get("price").value == Decimal("19.99") +# --- complex / floatlist (neu) --------------------------------------------- +def test_complex_dtype(): + assert Attribute("c", "1+2j", dtype="complex").value == complex(1, 2) + assert Attribute("c", "(1+2j)", dtype="complex").value == complex(1, 2) # mit Klammern + assert Attribute("c", complex(3, -4), dtype="complex").value == complex(3, -4) + assert Attribute("c", 5, dtype="complex").value == complex(5, 0) + assert Attribute("c", 2.5, dtype="complex").value == complex(2.5, 0) + assert Attribute("c", None, dtype="complex").value is None + assert Attribute("c", "", dtype="complex").value is None + assert Attribute("c", "nope", dtype="complex").value is None # lenient + with pytest.raises(DtypeError): + Attribute("c", "nope", dtype="complex", strict=True) + with pytest.raises(DtypeError): + Attribute("c", True, dtype="complex", strict=True) # bool abgelehnt + + +def test_floatlist_dtype(): + import numpy as np + assert Attribute("v", "1.0, 2.5, 3", dtype="floatlist").value == [1.0, 2.5, 3.0] + assert Attribute("v", [1, 2, 3], dtype="floatlist").value == [1.0, 2.0, 3.0] + assert Attribute("v", (1.5, 2.5), dtype="floatlist").value == [1.5, 2.5] + assert Attribute("v", np.array([1, 2, 3]), dtype="floatlist").value == [1.0, 2.0, 3.0] + assert Attribute("v", None, dtype="floatlist").value == [] + assert Attribute("v", "", dtype="floatlist").value == [] + # dtype-Alias "list[float]" + assert Attribute("v", [1, 2], dtype="list[float]").value == [1.0, 2.0] + # nicht-castbare Elemente / unzulässiger Typ -> lenient None (Wert unverändert), strict raises + assert Attribute("v", ["a", "b"], dtype="floatlist").value is None + assert Attribute("v", 5, dtype="floatlist").value is None + with pytest.raises(DtypeError): + Attribute("v", ["a"], dtype="floatlist", strict=True) # nicht-castbares Element + with pytest.raises(DtypeError): + Attribute("v", 5, dtype="floatlist", strict=True) # unzulässiger Typ + + +def test_complex_floatlist_resolve_xsd_json(): + assert dtypes.resolve(complex) == "complex" + assert dtypes.resolve("complex") == "complex" + assert dtypes.resolve("floatlist") == "floatlist" + assert dtypes.resolve("list[float]") == "floatlist" + assert dtypes.resolve("float[]") == "floatlist" + xsd = dtypes.xsd_map() + assert xsd["complex"] == "sdata:complex" and xsd["floatlist"] == "sdata:floatlist" + # to_json / json_default + assert dtypes.get("complex").to_json(complex(1, 2)) == "(1+2j)" + assert dtypes.get("complex").to_json(None) is None # passthrough + assert dtypes.get("floatlist").to_json([1.0, 2.0]) == [1.0, 2.0] # passthrough (JSON-nativ) + assert dtypes.json_default(complex(1, 2)) == "(1+2j)" + + +def test_complex_floatlist_json_roundtrip(): + m = Metadata() + m.add("impedance", "50+3j", dtype="complex") + m.add("spectrum", [1.0, 2.5, 3.0], dtype="floatlist") + restored = Metadata.from_json(m.to_json()) + assert restored.get("impedance").value == complex(50, 3) + assert restored.get("spectrum").value == [1.0, 2.5, 3.0] + + +def test_complex_floatlist_jsonld_roundtrip(): + from sdata import semantic + m = Metadata(name="probe") + m.add("impedance", "50+3j", dtype="complex") + m.add("spectrum", [1.0, 2.5, 3.0], dtype="floatlist") + doc = semantic.to_jsonld(m) + assert doc["sdata:impedance"]["@type"] == "sdata:complex" + assert doc["sdata:spectrum"]["@type"] == "sdata:floatlist" + back = semantic.from_jsonld(doc) + assert back.get("impedance").value == complex(50, 3) + assert back.get("spectrum").value == [1.0, 2.5, 3.0] # floatlist, nicht str-list + + # --- dtype=class & Re-Cast -------------------------------------------------- def test_dtype_class_accepted(): assert Attribute("a", 1, dtype=int).dtype == "int"