diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1d841eb..ef68845 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: "3.8" + python-version: "3.10" - name: Install dependencies run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d58eb62..090fce9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,7 +41,13 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + numpy-version: ["1.26.*", "2.*"] + exclude: + - python-version: "3.13" + numpy-version: "1.26.*" + - python-version: "3.14" + numpy-version: "1.26.*" steps: - name: Checkout @@ -56,7 +62,8 @@ jobs: - name: Install dependencies run: | pip install -r requirements.txt - pip install -e ".[dev]" + pip install -e ".[dev,robotics]" + pip install "numpy==${{ matrix.numpy-version }}" - name: Run pytest run: pytest diff --git a/pyproject.toml b/pyproject.toml index 5978ed3..499a58a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,23 +6,23 @@ build-backend = "setuptools.build_meta" name = "fastlabel" description = "The official Python SDK for FastLabel API, the Data Platform for AI" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" authors = [ - { name = "eisuke-ueta", email = "eisuke.ueta@fastlabel.ai" } + { name = "fastlabel", email = "dev@fastlabel.ai" } ] dependencies = [ "requests>=2.4.2,<3.0", - "numpy>=1.26.0,<2.0.0", + "numpy>=1.26.0,<3.0.0", "geojson>=2.0.0,<4.0", "xmltodict==0.12.0", - "Pillow>=10.0.0,<11.0.0", + "Pillow>=11.0.0,<13.0.0", "opencv-python>=4.10.0,<5.0.0" ] dynamic = ["version"] [project.optional-dependencies] -robotics = ["pandas>=2.0.0", "pyarrow>=14.0.0"] +robotics = ["pandas>=2.2.2", "pyarrow>=18.0.0"] dev = ["pytest>=7.0.0"] [tool.setuptools] diff --git a/requirements.txt b/requirements.txt index 76513bc..50c3bdd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ requests>=2.4.2,<3.0 -numpy>=1.26.0,<2.0.0 +numpy>=1.26.0,<3.0.0 geojson>=2.0.0,<4.0 xmltodict==0.12.0 -Pillow>=10.0.0,<11.0.0 +Pillow>=11.0.0,<13.0.0 opencv-python>=4.10.0,<5.0.0 \ No newline at end of file diff --git a/tests/test_lerobot_v3_parquet.py b/tests/test_lerobot_v3_parquet.py new file mode 100644 index 0000000..417808a --- /dev/null +++ b/tests/test_lerobot_v3_parquet.py @@ -0,0 +1,116 @@ +"""Tests for v3 pandas/pyarrow code paths. + +Covers _build_episode_map, get_episode_indices, _convert_episode_frames, and +check_dependencies so that pandas/pyarrow major-version bumps surface +breakage in CI. +""" +import pytest + +pd = pytest.importorskip("pandas") +pa = pytest.importorskip("pyarrow") + +from fastlabel.lerobot import common, v3 # noqa: E402 + + +def _write_parquet(path, rows): + df = pd.DataFrame(rows) + df.to_parquet(path) + + +@pytest.fixture +def v3_dataset(tmp_path): + """Create a minimal v3 layout with two chunks and two episodes per file.""" + data_dir = tmp_path / "data" + chunk0 = data_dir / "chunk-000" + chunk0.mkdir(parents=True) + + rows = [ + { + "episode_index": ep, + "frame_index": f, + "timestamp": float(f) * 0.1, + "observation.state": [0.1 * f, 0.2 * f], + "action": [1.0, 2.0], + } + for ep in (0, 1) + for f in range(3) + ] + _write_parquet(chunk0 / "file-000.parquet", rows) + + chunk1 = data_dir / "chunk-001" + chunk1.mkdir(parents=True) + rows = [ + { + "episode_index": 2, + "frame_index": f, + "timestamp": float(f) * 0.1, + "observation.state": [0.0, 0.0], + "action": [0.0, 0.0], + } + for f in range(2) + ] + _write_parquet(chunk1 / "file-000.parquet", rows) + + return tmp_path + + +class TestBuildEpisodeMap: + def test_returns_offsets_per_episode(self, v3_dataset): + result = v3._build_episode_map(v3_dataset) + + assert set(result.keys()) == {0, 1, 2} + assert result[0] == { + "chunk": "chunk-000", + "file_stem": "file-000", + "frame_offset": 0, + "length": 3, + } + assert result[1] == { + "chunk": "chunk-000", + "file_stem": "file-000", + "frame_offset": 3, + "length": 3, + } + assert result[2] == { + "chunk": "chunk-001", + "file_stem": "file-000", + "frame_offset": 0, + "length": 2, + } + + def test_get_episode_indices_sorted(self, v3_dataset): + assert v3.get_episode_indices(v3_dataset) == [0, 1, 2] + + +class TestConvertEpisodeFrames: + def test_extracts_frame_dicts(self, v3_dataset): + frames = v3._convert_episode_frames( + v3_dataset, episode_index=1, chunk="chunk-000", file_stem="file-000" + ) + + assert len(frames) == 3 + for i, frame in enumerate(frames): + assert frame["frame_index"] == i + assert frame["timestamp"] == pytest.approx(i * 0.1) + assert frame["action"] == [1.0, 2.0] + assert isinstance(frame["observation.state"], list) + + def test_missing_required_columns_returns_empty(self, tmp_path): + chunk = tmp_path / "data" / "chunk-000" + chunk.mkdir(parents=True) + _write_parquet( + chunk / "file-000.parquet", + [{"episode_index": 0, "frame_index": 0}], + ) + + assert ( + v3._convert_episode_frames( + tmp_path, episode_index=0, chunk="chunk-000", file_stem="file-000" + ) + == [] + ) + + +class TestCheckDependencies: + def test_returns_when_available(self): + common.check_dependencies() diff --git a/tests/test_pillow_exports.py b/tests/test_pillow_exports.py new file mode 100644 index 0000000..b0f35bc --- /dev/null +++ b/tests/test_pillow_exports.py @@ -0,0 +1,167 @@ +"""Smoke tests for Pillow-using code paths. + +These exercise the Image.open/new/fromarray/composite, convert, putpalette, +save, ImageDraw, and ImageColor calls inside fastlabel/__init__.py so that +Pillow major-version bumps surface API breakage in CI. +""" +import os + +import numpy as np +import pytest +from PIL import Image + +import fastlabel +from fastlabel import const + + +@pytest.fixture +def client(monkeypatch): + monkeypatch.setenv("FASTLABEL_ACCESS_TOKEN", "dummy-token") + return fastlabel.Client() + + +def _bbox_task(name="task1.png", w=64, h=48): + return { + "name": name, + "width": w, + "height": h, + "annotations": [ + { + "type": "bbox", + "value": "cat", + "color": "#ff0000", + "points": [10, 10, 40, 30], + } + ], + } + + +def _polygon_task(name="task2.png", w=64, h=48): + return { + "name": name, + "width": w, + "height": h, + "annotations": [ + { + "type": "polygon", + "value": "dog", + "color": "#00ff00", + "points": [5, 5, 50, 5, 50, 40, 5, 40], + } + ], + } + + +def _segmentation_task(name="task3.png", w=64, h=48): + return { + "name": name, + "width": w, + "height": h, + "annotations": [ + { + "type": "segmentation", + "value": "bird", + "color": "#0000ff", + "points": [[[5, 5, 50, 5, 50, 40, 5, 40]]], + } + ], + } + + +class TestExportIndexColorImage: + """Covers Image.new, Image.fromarray, convert('P'), putpalette, save.""" + + def _call(self, client, task, output_dir, **kwargs): + client._Client__export_index_color_image( + task=task, + output_dir=str(output_dir), + pallete=const.COLOR_PALETTE, + **kwargs, + ) + + def _assert_indexed_png(self, path): + assert os.path.exists(path) + with Image.open(path) as img: + assert img.mode == "P" + assert img.getpalette() is not None + assert img.size == (64, 48) + + def test_bbox_instance(self, client, tmp_path): + task = _bbox_task() + self._call(client, task, tmp_path, is_instance_segmentation=True) + self._assert_indexed_png(tmp_path / "task1.png") + + def test_polygon_semantic(self, client, tmp_path): + task = _polygon_task() + self._call( + client, + task, + tmp_path, + is_instance_segmentation=False, + classes=["dog"], + ) + self._assert_indexed_png(tmp_path / "task2.png") + + def test_segmentation_instance(self, client, tmp_path): + task = _segmentation_task() + self._call(client, task, tmp_path, is_instance_segmentation=True) + self._assert_indexed_png(tmp_path / "task3.png") + + +class TestCreateImageWithAnnotation: + """Covers Image.open, ImageDraw.Draw, ImageColor.getcolor, Image.composite.""" + + def _make_source_image(self, path, w=64, h=48): + arr = np.full((h, w, 3), 200, dtype=np.uint8) + Image.fromarray(arr).save(path) + + def _call(self, client, img_path, task, output_dir): + client._Client__create_image_with_annotation( + [str(img_path), task, str(output_dir)] + ) + + def test_bbox(self, client, tmp_path): + src = tmp_path / "src.png" + self._make_source_image(src) + out_dir = tmp_path / "out" + out_dir.mkdir() + task = _bbox_task(name="src.png") + self._call(client, src, task, out_dir) + result = out_dir / "src.png" + assert result.exists() + with Image.open(result) as img: + assert img.size == (64, 48) + + def test_polygon(self, client, tmp_path): + src = tmp_path / "p.png" + self._make_source_image(src) + out_dir = tmp_path / "out" + out_dir.mkdir() + task = _polygon_task(name="p.png") + self._call(client, src, task, out_dir) + assert (out_dir / "p.png").exists() + + def test_segmentation_triggers_composite(self, client, tmp_path): + src = tmp_path / "s.png" + self._make_source_image(src) + out_dir = tmp_path / "out" + out_dir.mkdir() + task = _segmentation_task(name="s.png") + self._call(client, src, task, out_dir) + result = out_dir / "s.png" + assert result.exists() + with Image.open(result) as img: + assert img.mode in ("RGB", "RGBA") + + def test_segmentation_jpeg_converts_rgb(self, client, tmp_path): + src = tmp_path / "s.jpg" + arr = np.full((48, 64, 3), 200, dtype=np.uint8) + Image.fromarray(arr).save(src, format="JPEG") + out_dir = tmp_path / "out" + out_dir.mkdir() + task = _segmentation_task(name="s.jpg") + self._call(client, src, task, out_dir) + result = out_dir / "s.jpg" + assert result.exists() + with Image.open(result) as img: + assert img.mode == "RGB"