From 30b2996eec6bee7e7d7233ee47545663ecc18fbc Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 17:58:32 +0200 Subject: [PATCH 1/7] add hypothesis as a dev dependency --- pyproject.toml | 1 + uv.lock | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cdb8fd8..7429b7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dev = [ "requests-mock>=1.12.1", "ruff>=0.5.0", "pyright>=1.1.0", + "hypothesis>=6.0", ] [project.scripts] diff --git a/uv.lock b/uv.lock index 73b4764..5c518f8 100644 --- a/uv.lock +++ b/uv.lock @@ -233,6 +233,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/d3/5268aeabf2ad82658c4e2ff3a060648d0f02f3926cb53247c0e4d0dab49e/griffelib-2.1.0-py3-none-any.whl", hash = "sha256:cc7b3d2d2865ad0b909fcc38086e3f554b5ea7acbaa7bbb7ecaa3f5dfb7d9f00", size = 142560, upload-time = "2026-06-19T12:05:38.742Z" }, ] +[[package]] +name = "hypothesis" +version = "6.155.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/55/983b6bc1b6b343a5ff6020388f9d0680ab477be59a731517e6c4a0387100/hypothesis-6.155.7.tar.gz", hash = "sha256:d8d6091753d0669db3c90c5e5b346cb37c72f3dd9378c8413acb1fd5da63f7ea", size = 478291, upload-time = "2026-06-21T05:54:31.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/f8/c151e196d4f397ed9436a071e52666c70a2f021138dea828b0a461e245db/hypothesis-6.155.7-py3-none-any.whl", hash = "sha256:9f634bdb1f9e9b8ab6ba09431cf2deedb750c96978125a6fb3c5a0f6c6db4131", size = 544762, upload-time = "2026-06-21T05:54:29.506Z" }, +] + [[package]] name = "identify" version = "2.6.19" @@ -972,6 +985,7 @@ dependencies = [ [package.optional-dependencies] dev = [ + { name = "hypothesis" }, { name = "mkdocs" }, { name = "mkdocs-glightbox" }, { name = "mkdocs-material" }, @@ -991,6 +1005,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6.0" }, { name = "mkdocs", extras = ["python"], marker = "extra == 'dev'", specifier = ">=1.6.0" }, { name = "mkdocs-glightbox", marker = "extra == 'dev'", specifier = ">=0.4.0" }, { name = "mkdocs-material", marker = "extra == 'dev'", specifier = ">=9.5.27" }, @@ -1589,6 +1604,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "threadpoolctl" version = "3.6.0" From 7c33726d04fc80da8cfc9182880306ea121e1a00 Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 17:59:39 +0200 Subject: [PATCH 2/7] add invariant tests --- tests/integration/test_invariants.py | 132 +++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 tests/integration/test_invariants.py diff --git a/tests/integration/test_invariants.py b/tests/integration/test_invariants.py new file mode 100644 index 0000000..3bc8c4e --- /dev/null +++ b/tests/integration/test_invariants.py @@ -0,0 +1,132 @@ +""" +These property tests correspond to the "Guarantees" section documented on ``extract_colors``. + +The tests are parametrized over ``available_methods()`` so a newly registered +extractor is covered automatically. A Hypothesis property test fuzzes arbitrary +small images to catch degenerate inputs. +""" + +import numpy as np +import pytest +from hypothesis import HealthCheck, given, settings +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays +from PIL import Image + +from pylette import NoValidPixelsError, Palette, extract_colors +from pylette.src.extractors import available_methods + +METHODS = available_methods() + +# Degenerate images make KMeans/OKLab emit sklearn ConvergenceWarnings; that is +# expected here and not what these tests are about. +pytestmark = pytest.mark.filterwarnings("ignore::UserWarning") + + +def _assert_palette_invariants(palette: Palette, palette_size: int) -> None: + # len(palette) <= palette_size + assert len(palette) <= palette_size + if len(palette) == 0: + return + # sum(frequencies) ~= 1.0 + assert sum(palette.frequencies) == pytest.approx(1.0) + for color in palette.colors: + # every channel in-gamut, plain Python ints + assert all(isinstance(channel, int) and 0 <= channel <= 255 for channel in color.rgb) + + +@pytest.mark.parametrize("mode", METHODS) +@pytest.mark.parametrize("palette_size", [1, 3, 5]) +@pytest.mark.parametrize("resize", [True, False]) +def test_solid_image_is_handled(mode: str, palette_size: int, resize: bool) -> None: + img = Image.new("RGB", (8, 8), (12, 200, 75)) + palette = extract_colors(img, palette_size=palette_size, mode=mode, resize=resize) + _assert_palette_invariants(palette, palette_size) + assert len(palette) >= 1 + + +@pytest.mark.parametrize("mode", METHODS) +def test_one_by_one_image_is_handled(mode: str) -> None: + img = Image.fromarray(np.array([[[10, 20, 30]]], dtype=np.uint8), "RGB") + palette = extract_colors(img, palette_size=5, mode=mode, resize=False) + _assert_palette_invariants(palette, 5) + assert len(palette) >= 1 + + +@pytest.mark.parametrize("mode", METHODS) +def test_palette_size_exceeds_distinct_colors(mode: str) -> None: + arr = np.array([[[0, 0, 0], [255, 255, 255]], [[255, 0, 0], [0, 0, 255]]], dtype=np.uint8) + img = Image.fromarray(arr, "RGB") + palette = extract_colors(img, palette_size=10, mode=mode, resize=False) + _assert_palette_invariants(palette, 10) + + +@pytest.mark.parametrize("mode", METHODS) +def test_partial_alpha_mask_is_handled(mode: str) -> None: + arr = np.zeros((16, 16, 4), dtype=np.uint8) + arr[..., :3] = np.random.default_rng(0).integers(0, 256, (16, 16, 3)) + arr[::2, :, 3] = 255 # half opaque, half transparent + img = Image.fromarray(arr, "RGBA") + palette = extract_colors(img, palette_size=5, mode=mode, resize=False, alpha_mask_threshold=0) + _assert_palette_invariants(palette, 5) + + +@pytest.mark.parametrize("mode", METHODS) +def test_total_alpha_mask_raises_typed_error(mode: str) -> None: + arr = np.zeros((16, 16, 4), dtype=np.uint8) # alpha = 0 everywhere + img = Image.fromarray(arr, "RGBA") + with pytest.raises(NoValidPixelsError): + extract_colors(img, palette_size=5, mode=mode, resize=False, alpha_mask_threshold=0) + + +@pytest.mark.parametrize("mode", METHODS) +def test_deterministic_under_fixed_random_state(mode: str) -> None: + arr = np.random.default_rng(7).integers(0, 256, (20, 20, 3), dtype=np.uint8) + img = Image.fromarray(arr, "RGB") + a = extract_colors(img, palette_size=5, mode=mode) + b = extract_colors(img, palette_size=5, mode=mode) + assert [c.rgb for c in a.colors] == [c.rgb for c in b.colors] + assert [c.frequency for c in a.colors] == [c.frequency for c in b.colors] + + +@pytest.mark.parametrize("mode", METHODS) +@pytest.mark.parametrize( + "sort_mode, key, reverse", + [ + ("luminance", lambda c: c.luminance, False), + ("frequency", lambda c: c.frequency, True), + ], +) +def test_sort_order_is_stable_and_idempotent(mode, sort_mode, key, reverse) -> None: # type: ignore[no-untyped-def] + arr = np.random.default_rng(3).integers(0, 256, (24, 24, 3), dtype=np.uint8) + img = Image.fromarray(arr, "RGB") + palette = extract_colors(img, palette_size=6, mode=mode, sort_mode=sort_mode) + colors = palette.colors + # The returned palette is already in sort order, and re-sorting is a no-op. + resorted = sorted(colors, key=key, reverse=reverse) + assert [c.rgb for c in resorted] == [c.rgb for c in colors] + + +_image_arrays = arrays( + dtype=np.uint8, + shape=st.tuples(st.integers(1, 12), st.integers(1, 12), st.sampled_from([3, 4])), +) + + +@settings(max_examples=40, deadline=None, suppress_health_check=[HealthCheck.too_slow]) +@given( + arr=_image_arrays, + palette_size=st.integers(1, 8), + mode=st.sampled_from(METHODS), + sort_mode=st.sampled_from([None, "luminance", "frequency"]), + resize=st.booleans(), +) +def test_property_invariants_hold_for_arbitrary_images(arr, palette_size, mode, sort_mode, resize) -> None: # type: ignore[no-untyped-def] + mode_str = "RGB" if arr.shape[-1] == 3 else "RGBA" + img = Image.fromarray(arr, mode_str) + try: + palette = extract_colors(img, palette_size=palette_size, mode=mode, sort_mode=sort_mode, resize=resize) + except NoValidPixelsError: + # A fully alpha-masked image is an expected, typed failure (P4). + return + _assert_palette_invariants(palette, palette_size) From 74bec01113acf0eecabb885bb1c785634671c4dd Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 18:00:04 +0200 Subject: [PATCH 3/7] break early if box cannot be split in median cut --- pylette/src/extractors/median_cut.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pylette/src/extractors/median_cut.py b/pylette/src/extractors/median_cut.py index 7ebf1e0..6006e4e 100644 --- a/pylette/src/extractors/median_cut.py +++ b/pylette/src/extractors/median_cut.py @@ -141,6 +141,12 @@ def extract(self, arr: NDArray[NP_T], palette_size: int) -> list[Color]: valid_pixel_count = arr.shape[0] boxes = [ColorBox(arr)] while len(boxes) < palette_size: - largest_box_idx = np.argmax([box.size for box in boxes]) + # Only boxes with at least 2 pixels can be split; a 1-pixel box would + # produce an empty box. Stop once nothing is splittable (e.g. there + # are fewer distinct pixels than the requested palette size). + splittable = [i for i, box in enumerate(boxes) if box.pixel_count >= 2] + if not splittable: + break + largest_box_idx = splittable[int(np.argmax([boxes[i].size for i in splittable]))] boxes = boxes[:largest_box_idx] + boxes[largest_box_idx].split() + boxes[largest_box_idx + 1 :] return [Color(tuple(map(int, box.average)), box.pixel_count / valid_pixel_count) for box in boxes] From 18fd458a7d976b1cb5dabba935e52d6392648d92 Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 18:00:33 +0200 Subject: [PATCH 4/7] clamp number of colors to the number of pixels to avoid degenerate cases --- pylette/src/extractors/k_means.py | 7 +++++-- pylette/src/extractors/oklab.py | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pylette/src/extractors/k_means.py b/pylette/src/extractors/k_means.py index 5350b57..e15ac56 100644 --- a/pylette/src/extractors/k_means.py +++ b/pylette/src/extractors/k_means.py @@ -27,8 +27,11 @@ def extract(self, arr: NDArray[NP_T], palette_size: int) -> list[Color]: from sklearn.cluster import KMeans - arr = np.squeeze(arr) - model = KMeans(n_clusters=palette_size, n_init="auto", init="k-means++", random_state=2024) + arr = self._reshape_array(arr) + # Never request more clusters than there are pixels (degenerate inputs + # like a 1x1 image); KMeans requires n_clusters <= n_samples. + n_colors = min(palette_size, arr.shape[0]) + model = KMeans(n_clusters=n_colors, n_init="auto", init="k-means++", random_state=2024) labels = model.fit_predict(arr) palette = np.array(model.cluster_centers_, dtype=int) color_count = np.bincount(labels) diff --git a/pylette/src/extractors/oklab.py b/pylette/src/extractors/oklab.py index c10fd41..be24b04 100644 --- a/pylette/src/extractors/oklab.py +++ b/pylette/src/extractors/oklab.py @@ -110,7 +110,10 @@ def extract(self, arr: NDArray[NP_T], palette_size: int) -> list[Color]: # sRGB8 -> OKLab lab = linear_srgb_to_oklab(srgb_to_linear(rgb8 / 255.0)) - model = KMeans(n_clusters=palette_size, n_init="auto", init="k-means++", random_state=2024) + # Never request more clusters than there are pixels (degenerate inputs + # like a 1x1 image); KMeans requires n_clusters <= n_samples. + n_clusters = min(palette_size, len(pixels)) + model = KMeans(n_clusters=n_clusters, n_init="auto", init="k-means++", random_state=2024) labels = model.fit_predict(lab) centers_lab = np.asarray(model.cluster_centers_) @@ -118,11 +121,11 @@ def extract(self, arr: NDArray[NP_T], palette_size: int) -> list[Color]: # Color stores full precision; out-of-gamut values are clamped. centers_srgb = np.clip(linear_to_srgb(oklab_to_linear_srgb(centers_lab)), 0.0, 1.0) - counts = np.bincount(labels, minlength=palette_size) + counts = np.bincount(labels, minlength=n_clusters) total = float(counts.sum()) colors: list[Color] = [] - for i in range(palette_size): + for i in range(n_clusters): if counts[i] == 0: continue mean_alpha = float(alpha[labels == i].mean()) / 255.0 From 955985ce2c8d5ccc9c8bc8dfc2f1b9419dd2cfe4 Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 18:00:48 +0200 Subject: [PATCH 5/7] add a note on frequencies in Palette --- pylette/src/palette.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pylette/src/palette.py b/pylette/src/palette.py index 740d19c..8125fd3 100644 --- a/pylette/src/palette.py +++ b/pylette/src/palette.py @@ -23,6 +23,10 @@ def __init__(self, colors: list[Color], metadata: PaletteMetaData | None = None) Parameters: colors (list[Color]): A list of Color objects. + + Note: + For a palette produced by :func:`~pylette.extract_colors`, + ``frequencies`` are the per-color relative weights and sum to ``1.0``. """ self.colors = colors From 60c6d09da47bec5b1f42ba4ac74aa89de0ee321b Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 18:01:11 +0200 Subject: [PATCH 6/7] Add a list of invariants that extract_colors respect --- pylette/src/color_extraction.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pylette/src/color_extraction.py b/pylette/src/color_extraction.py index 01be348..c735f33 100644 --- a/pylette/src/color_extraction.py +++ b/pylette/src/color_extraction.py @@ -170,6 +170,29 @@ def extract_colors( Returns: Palette: A palette of the extracted colors. + Guarantees: + The returned palette satisfies these invariants for every extraction + method (pinned by the property suite in ``tests/integration/test_invariants.py``): + + - ``len(palette) <= palette_size``. Fewer colors are returned when the + image has fewer distinct colors than requested. + - The color frequencies sum to ``1.0``. + - Every channel of every ``Color.rgb`` is an ``int`` in ``[0, 255]``. + - Extraction is deterministic: the same image and arguments always + produce the same palette. + - Colors are ordered by ``sort_mode`` — ascending ``luminance`` or, by + default, descending ``frequency`` — and that ordering is stable. + - Degenerate inputs (a solid color, a 1x1 image, ``palette_size`` + greater than the number of distinct colors, a partial alpha mask) are + handled without error. The one expected failure is an image with no + pixels left to sample (e.g. a fully alpha-masked image), which raises + :class:`~pylette.NoValidPixelsError`. + + Raises: + InvalidImageError: If the image cannot be loaded or its type is unsupported. + NoValidPixelsError: If no pixels remain after alpha masking. + UnknownExtractionMethodError: If ``mode`` is not a known extraction method. + Examples: Colors can be extracted from a variety of sources, including local files, byte streams, URLs, and numpy arrays. From 84328942dcb4ae6ca8519179a4b7fb4daf1c3aec Mon Sep 17 00:00:00 2001 From: Ivar Stangeby Date: Sun, 28 Jun 2026 18:02:06 +0200 Subject: [PATCH 7/7] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0255405..a901d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,9 @@ rather than message. Each subclass also derives from `ValueError`, so existing ### Fixed +- **Degenerate inputs no longer crash extraction**: a 1x1 image, a `palette_size` + larger than the number of distinct colors, or any case with fewer pixels than + the requested palette size previously raised raw sklearn/numpy `ValueError`s. - **Reshape bug with alpha masking**: Extractors reshaped the pixel array to `(height * width, n_channels)`, but after alpha masking the valid-pixel count can be smaller than `height * width`. This caused the reshape to either raise