diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index aabc9ca..1e343d9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -3,19 +3,22 @@ name: Unit Testing on: [pull_request] jobs: - unittest: - runs-on: ubuntu-latest + unittest: + runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 1 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 - - name: Install uv - uses: astral-sh/setup-uv@v5 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable - - name: Set up Python - run: uv sync + - name: Install uv + uses: astral-sh/setup-uv@v5 - - name: Run Unit Tests - run: uv run pytest -n auto + - name: Set up Python + run: uv sync + + - name: Run Unit Tests + run: uv run pytest -n auto diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..ca356d0 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,29 @@ +name: Rust CI + +on: [pull_request] + +jobs: + rust-ci: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Clippy + run: cargo clippy --all-targets --all-features -- -D warnings + + - name: Tests + run: cargo test --all-features diff --git a/.gitignore b/.gitignore index a250447..29ac4fc 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,8 @@ assets/ .vscode/ .hypothesis/ prof/ -.coverage \ No newline at end of file +.coverage + +# Rust build artifacts +target/ +Cargo.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b63d5d9..595f951 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,3 +19,25 @@ repos: - id: ruff args: [--fix] - id: ruff-format + + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + entry: cargo fmt --all + language: system + types: [rust] + pass_filenames: false + - id: cargo-clippy + name: cargo clippy + entry: cargo clippy --all-targets --all-features -- -D warnings + language: system + types: [rust] + pass_filenames: false + - id: cargo-test + name: cargo test + entry: cargo test --all-features + language: system + types: [rust] + pass_filenames: false + stages: [pre-push] diff --git a/AGENTS.md b/AGENTS.md index 54931df..1cf33c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,20 +13,29 @@ Future metrics (e.g., audio coverage, state-graph coverage) will follow the same ``` . +├── Cargo.toml # Rust manifest (maturin builds the extension) +├── pyproject.toml # Project metadata, maturin build backend ├── src/ │ ├── main.py # CLI entry point (Typer) +│ ├── lib.rs # PyO3 module entry point (Rust) +│ ├── bktree.rs # BK-tree with POPCNT Hamming distance +│ ├── unionfind.rs # Flat Vec-based union-find +│ ├── monitor.rs # CoverageTracker (BK-tree + UnionFind combined) │ └── gamecov/ │ ├── __init__.py # Public API re-exports +│ ├── _gamecov_core.pyi # Type stub for Rust extension │ ├── cov_base.py # Abstract protocols: CoverageItem, Coverage, CoverageMonitor │ ├── frame.py # Frame dataclass (PIL Image wrapper with average-hash) │ ├── dedup.py # Deduplication algorithms (pHash, SSIM [deprecated]) -│ ├── frame_cov.py # FrameCoverage, FrameMonitor, BKFrameMonitor, BK-tree, UnionFind +│ ├── frame_cov.py # FrameCoverage, FrameMonitor, BKFrameMonitor, RustBKFrameMonitor, BK-tree, UnionFind │ ├── loader.py # MP4 loading: bulk, lazy (generator), last-n │ ├── writer.py # MP4 writing: imageio and OpenCV backends │ ├── stitch.py # Panorama stitching of unique frames │ ├── generator.py # Hypothesis strategies for property-based testing │ ├── env.py # Runtime config (RADIUS env var) │ └── py.typed # PEP 561 marker +├── rust-tests/ +│ └── prop_tests.rs # Rust proptest property-based tests ├── tests/ │ ├── test_generators.py # Frame/FrameList generation strategies │ ├── test_dedup.py # Dedup monotonicity properties @@ -35,19 +44,34 @@ Future metrics (e.g., audio coverage, state-graph coverage) will follow the same │ ├── test_load_write_assets.py# Differential tests across loaders on real videos │ ├── test_monotone.py # Coverage monotonicity (FrameMonitor & BKFrameMonitor) │ ├── test_BK_frame_monitor.py # Differential: FrameMonitor vs BKFrameMonitor +│ ├── test_rust_frame_monitor.py # Differential & monotonicity: BKFrameMonitor vs RustBKFrameMonitor │ └── test_monotone_smb.py # Real-world monotonicity on SMB dataset +├── benchmarks/ +│ ├── conftest.py # Session-scoped fixtures (pre-generated FrameCoverage) +│ └── test_bench_monitor.py # Python vs Rust monitor throughput benchmarks ├── assets/ │ ├── videos/ # Small sample MP4s for integration tests │ └── smb/ # Super Smash Bros recordings for stress tests ├── docs/ │ └── design.md # Architecture and design documentation -├── pyproject.toml # Project metadata, dependencies, tool configs -├── .pre-commit-config.yaml # Pre-commit hooks -├── .github/workflows/ # CI: pytest, mypy, ruff, pylint +├── rustfmt.toml # Rust formatting config +├── .pre-commit-config.yaml # Pre-commit hooks (Python + Rust) +├── .github/workflows/ # CI: pytest, mypy, ruff, pylint, rust (fmt/clippy/test) ├── AGENTS.md # This file └── README.md # Human-facing documentation ``` +### Rust extension (gamecov-core) + +The Rust extension is built as part of the package via maturin. The compiled +module is installed as `gamecov._gamecov_core` and provides high-performance +replacements for the BK-tree, union-find, and coverage tracker. + +Build the package (includes Rust compilation): `uv sync` or `pip install .` +Run Rust tests independently: `cargo test` +Check Rust formatting: `cargo fmt --all -- --check` +Run Rust linting: `cargo clippy --all-targets --all-features -- -D warnings` + ## Design See [docs/design.md](docs/design.md) for the coverage framework architecture, frame coverage pipeline, BK-tree optimization, and loading strategies. @@ -59,7 +83,7 @@ See [docs/design.md](docs/design.md) for the coverage framework architecture, fr | `cov_base.py` | `CoverageItem`, `Coverage[T]`, `CoverageMonitor[T]` protocols/ABC | | `frame.py` | `Frame` dataclass (PIL Image + average-hash) | | `dedup.py` | `is_dup()`, `dedup_unique_frames()`, `dedup_unique_hashes()`, `ssim_dedup()` [deprecated] | -| `frame_cov.py` | `FrameCoverage`, `FrameMonitor`, `BKFrameMonitor`, `get_frame_cov()`, `_UnionFind`, `_BKTree` | +| `frame_cov.py` | `FrameCoverage`, `FrameMonitor`, `BKFrameMonitor`, `RustBKFrameMonitor`, `get_frame_cov()`, `_UnionFind`, `_BKTree` | | `loader.py` | `load_mp4()`, `load_mp4_lazy()`, `load_mp4_last_n()` | | `writer.py` | `write_mp4()`, `write_mp4_cv2()` | | `stitch.py` | `stitch_images()` (panorama via AffineStitcher) | @@ -80,7 +104,8 @@ See [docs/design.md](docs/design.md) for the coverage framework architecture, fr | `opencv-python` | Color conversion, video writing, image processing | | `scikit-image` | SSIM metric (deprecated path) | | `stitching` | Panorama stitching via OpenCV features | -| `numpy`, `numba` | Numerical arrays, optional JIT acceleration | +| `numpy` | Numerical arrays | +| `gamecov._gamecov_core` | Built-in Rust extension: BK-tree, union-find, coverage tracker (PyO3/maturin) | | `returns` | Functional `Result` type for error handling | | `deprecated` | `@deprecated` decorator | | `typer-slim` | CLI framework | @@ -93,6 +118,7 @@ See [docs/design.md](docs/design.md) for the coverage framework architecture, fr | `mypy` | Static type checking (strict mode, returns plugin) | | `ruff` | Linting and formatting | | `pre-commit` | Pre-commit hook runner | +| `pytest-benchmark` | Performance benchmarking (Python vs Rust) | | `pytest-xdist` | Parallel test execution (`-n auto`) | | `pytest-cov` | Coverage reporting | | `pytest-profiling` | Performance profiling | @@ -110,6 +136,7 @@ uv run pytest -n auto - **Integration** (real assets): `test_load_n.py`, `test_load_write_assets.py` - **Monotonicity**: `test_monotone.py` (random data), `test_monotone_smb.py` (real SMB recordings) - **Differential**: `test_BK_frame_monitor.py` (FrameMonitor vs BKFrameMonitor produce identical results) +- **Rust backend**: `test_rust_frame_monitor.py` (differential Python vs Rust, order-independence, monotonicity) Some tests require assets in `assets/videos/` or `assets/smb/` and will skip if missing. @@ -118,6 +145,24 @@ Some tests require assets in `assets/videos/` or `assets/smb/` and will skip if - `RADIUS` — Hamming distance threshold (default `5`). - `N_MAX` — Maximum number of recordings to process in monotonicity tests (default `100`). +## Benchmarks + +```bash +# Run benchmarks (disabled by default during normal test runs) +uv run pytest benchmarks/ --benchmark-enable + +# Group output by backend for side-by-side comparison +uv run pytest benchmarks/ --benchmark-enable --benchmark-group-by=param:backend + +# Save results for later comparison +uv run pytest benchmarks/ --benchmark-enable --benchmark-save=baseline +uv run pytest benchmarks/ --benchmark-enable --benchmark-compare=baseline +``` + +Benchmarks live in `benchmarks/` and are excluded from the normal test suite. +They compare `BKFrameMonitor` (Python) vs `RustBKFrameMonitor` (Rust) throughput +at the monitor level (`add_cov`/`is_seen` operations). + ## Development - Before start working, refresh your knowledge from contents in `.agents` first. @@ -129,7 +174,8 @@ Some tests require assets in `assets/videos/` or `assets/smb/` and will skip if Local variables' types are optional as long as the types can be easily inferred. - Use f-strings for string interpolation. - Use `TypedDict`, `Literal`, `Protocol`, and `TypeVar` from `typing` module when appropriate. -- Always run `mypy`, and `ruff` to ensure code quality after updating code in `src/`. +- Always run `mypy` and `ruff` to ensure code quality after updating Python code in `src/`. +- Always run `cargo fmt`, `cargo clippy -- -D warnings`, and `cargo test` after updating Rust code in `src/`. - Never commit changes or create PRs. Suggest commit messages to the human developer for review after your changes to the codebase. - Always use `typer` to handle CLI commands. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..cbd1a7b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "gamecov-core" +version = "0.2.0" +edition = "2021" +description = "Rust-accelerated core for gamecov frame coverage monitoring" + +[lib] +name = "gamecov_core" +crate-type = ["cdylib", "rlib"] + +[dependencies] +pyo3 = { version = "0.23", features = ["extension-module"] } + +[dev-dependencies] +proptest = "1" + +[[test]] +name = "prop_tests" +path = "rust-tests/prop_tests.rs" + +[profile.release] +lto = "fat" +codegen-units = 1 diff --git a/README.md b/README.md index db0fa4c..c1f975d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Two frames are considered duplicates if the Hamming distance between their perce ## Installation -Requires Python >= 3.11. +Requires Python >= 3.11 and a [Rust toolchain](https://rustup.rs/) (for building from source). ### As a package @@ -41,6 +41,8 @@ Or with pip: pip install git+https://github.com/SecurityLab-UCD/gamecov.git ``` +This builds both the Python package and the embedded Rust extension (`gamecov._gamecov_core`) in a single step. + ### For development Clone the repo and sync dependencies: @@ -79,6 +81,24 @@ print(f"Total unique frames: {len(monitor.item_seen)}") print(f"Unique paths: {len(monitor.path_seen)}") ``` +### Rust-accelerated monitor + +`RustBKFrameMonitor` provides the same interface as `BKFrameMonitor`, +backed by an embedded Rust extension for significantly higher throughput: + +```python +from gamecov import FrameCoverage, RustBKFrameMonitor + +monitor = RustBKFrameMonitor() # same API as BKFrameMonitor + +for recording in recordings: + cov = FrameCoverage(recording) + if not monitor.is_seen(cov): + monitor.add_cov(cov) + +print(f"Coverage components: {monitor.coverage_count}") +``` + ### CLI ```bash @@ -90,8 +110,12 @@ uv run python src/main.py --input-mp4-path path/to/video.mp4 --confidence-thresh ### Prerequisites +- Python >= 3.11 +- [Rust toolchain](https://rustup.rs/) (stable) +- [uv](https://docs.astral.sh/uv/) + ```bash -# Install dependencies +# Install dependencies (builds the Rust extension automatically) uv sync # Install pre-commit hooks @@ -101,11 +125,24 @@ uv run pre-commit install ### Running Tests ```bash -# Run all tests in parallel +# Run all Python tests in parallel uv run pytest -n auto # Run with coverage uv run pytest -n auto --cov=gamecov + +# Run Rust unit and property tests +cargo test +``` + +### Benchmarks + +```bash +# Compare Python vs Rust monitor throughput +uv run pytest benchmarks/ --benchmark-enable + +# Side-by-side grouped by backend +uv run pytest benchmarks/ --benchmark-enable --benchmark-group-by=param:backend ``` ### Code Quality @@ -121,3 +158,29 @@ uv run ruff check src/ ### CI GitHub Actions runs four checks on every PR: `pytest`, `mypy`, `ruff`, and `pylint`. +The CI workflow installs the Rust toolchain before building. + +## Performance: Rust vs Python Backend + +The embedded Rust extension (`RustBKFrameMonitor`) provides significant speedups +over the pure-Python `BKFrameMonitor` for the core `add_cov`/`is_seen` monitor operations. +The advantage grows with workload size as the BK-tree and union-find structures scale. + +Benchmark results (mean time per iteration, lower is better): + +| Recordings | Python (ms) | Rust (ms) | Speedup | +| ---------- | ----------- | --------- | ------- | +| 10 | 4.04 | 2.31 | 1.75x | +| 50 | 42.95 | 15.00 | 2.86x | +| 200 | 424.36 | 111.03 | 3.82x | +| 500 | 2,349.74 | 549.40 | 4.28x | + +The Rust backend achieves **1.8x -- 4.3x** speedup, +with larger gains at higher workloads where BK-tree traversal and union-find operations dominate. +Each recording contains randomly generated `FrameCoverage` objects with perceptual hashes. + +Reproduce these results with: + +```bash +uv run pytest benchmarks/ --benchmark-enable --benchmark-group-by=param:backend +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..cfada86 --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,72 @@ +"""Shared fixtures for benchmark suite. + +Pre-generates FrameCoverage objects so the benchmark loop only measures +monitor operations (add_cov / is_seen), not video I/O or hashing. +""" + +from __future__ import annotations + +import os +import tempfile +from collections.abc import Generator + +import numpy as np +import pytest + +from gamecov import FrameCoverage +from gamecov.frame import Frame +from gamecov.writer import write_mp4 + +SEED: int = 42 +FRAME_HEIGHT: int = 128 +FRAME_WIDTH: int = 128 +FRAMES_PER_RECORDING: int = 20 + + +def _generate_coverages( + n_recordings: int, + seed: int = SEED, +) -> Generator[list[FrameCoverage], None, None]: + """Generate n_recordings FrameCoverage objects from deterministic random frames.""" + rng = np.random.default_rng(seed) + coverages: list[FrameCoverage] = [] + temp_files: list[str] = [] + + for _ in range(n_recordings): + frames = [ + Frame.fromarray( + rng.integers( + 0, 256, size=(FRAME_HEIGHT, FRAME_WIDTH, 3), dtype=np.uint8 + ) + ) + for _ in range(FRAMES_PER_RECORDING) + ] + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + write_mp4(frames, tmp.name) + temp_files.append(tmp.name) + coverages.append(FrameCoverage(tmp.name)) + + yield coverages + + for f in temp_files: + os.remove(f) + + +@pytest.fixture(scope="session") +def coverages_10() -> Generator[list[FrameCoverage], None, None]: + yield from _generate_coverages(10) + + +@pytest.fixture(scope="session") +def coverages_50() -> Generator[list[FrameCoverage], None, None]: + yield from _generate_coverages(50) + + +@pytest.fixture(scope="session") +def coverages_200() -> Generator[list[FrameCoverage], None, None]: + yield from _generate_coverages(200) + + +@pytest.fixture(scope="session") +def coverages_500() -> Generator[list[FrameCoverage], None, None]: + yield from _generate_coverages(500) diff --git a/benchmarks/test_bench_monitor.py b/benchmarks/test_bench_monitor.py new file mode 100644 index 0000000..b40696b --- /dev/null +++ b/benchmarks/test_bench_monitor.py @@ -0,0 +1,89 @@ +"""Benchmark: BKFrameMonitor (Python) vs RustBKFrameMonitor (Rust). + +Measures add_cov throughput at the monitor level. + +Run with: + uv run pytest benchmarks/ --benchmark-enable + uv run pytest benchmarks/ --benchmark-enable --benchmark-group-by=param:backend +""" + +from __future__ import annotations + +from typing import Callable, Union + +import pytest +from pytest_benchmark.fixture import BenchmarkFixture + +from gamecov import BKFrameMonitor, FrameCoverage +from gamecov.frame_cov import RustBKFrameMonitor + +MonitorFactory = Callable[[], Union[BKFrameMonitor, RustBKFrameMonitor]] + + +def _run_monitor( + factory: MonitorFactory, + coverages: list[FrameCoverage], +) -> int: + """Feed all coverages into a fresh monitor and return coverage_count.""" + monitor = factory() + for cov in coverages: + if not monitor.is_seen(cov): + monitor.add_cov(cov) + return monitor.coverage_count + + +@pytest.mark.parametrize( + "backend", + [ + pytest.param("python", id="python"), + pytest.param("rust", id="rust"), + ], +) +class TestMonitorBenchmark: + """Parametrized benchmark comparing Python and Rust backends.""" + + @staticmethod + def _factory(backend: str) -> MonitorFactory: + if backend == "python": + return BKFrameMonitor + return RustBKFrameMonitor + + def test_add_cov_10( + self, + benchmark: BenchmarkFixture, + backend: str, + coverages_10: list[FrameCoverage], + ) -> None: + """Benchmark add_cov with 10 recordings.""" + result = benchmark(_run_monitor, self._factory(backend), coverages_10) + assert result > 0 + + def test_add_cov_50( + self, + benchmark: BenchmarkFixture, + backend: str, + coverages_50: list[FrameCoverage], + ) -> None: + """Benchmark add_cov with 50 recordings.""" + result = benchmark(_run_monitor, self._factory(backend), coverages_50) + assert result > 0 + + def test_add_cov_200( + self, + benchmark: BenchmarkFixture, + backend: str, + coverages_200: list[FrameCoverage], + ) -> None: + """Benchmark add_cov with 200 recordings.""" + result = benchmark(_run_monitor, self._factory(backend), coverages_200) + assert result > 0 + + def test_add_cov_500( + self, + benchmark: BenchmarkFixture, + backend: str, + coverages_500: list[FrameCoverage], + ) -> None: + """Benchmark add_cov with 500 recordings.""" + result = benchmark(_run_monitor, self._factory(backend), coverages_500) + assert result > 0 diff --git a/pyproject.toml b/pyproject.toml index fadb637..64ea7d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "gamecov" -version = "0.1.6" +version = "0.2.0" description = "Coverage monitoring for directed game play-testing (Game-Fuzz)" readme = "README.md" authors = [{ name = "Yifeng He", email = "yfhe.prsn@gmail.com" }] @@ -11,7 +11,6 @@ dependencies = [ "hypothesis>=6.136.7", "imagehash>=4.3.2", "imageio[ffmpeg]>=2.37.0", - "numba>=0.61.2", "numpy>=2.0.0", "opencv-python>=4.11.0.86", "pillow>=11.3.0", @@ -24,8 +23,13 @@ dependencies = [ ] [build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +requires = ["maturin>=1.9,<2.0"] +build-backend = "maturin" + +[tool.maturin] +python-source = "src" +module-name = "gamecov._gamecov_core" +features = ["pyo3/extension-module"] [tool.pylint.messages_control] disable = [ @@ -68,13 +72,15 @@ warn_unreachable = true warn_unused_ignores = false plugins = ["returns.contrib.mypy.returns_plugin"] -# [tool.pytest.ini_options] -# pytest_plugins = ['pytest_profiling'] +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "--benchmark-disable" [dependency-groups] dev = [ "mypy>=1.15.0", "pre-commit>=4.5.1", + "pytest-benchmark>=5.1.0", "pytest-cov>=6.2.1", "pytest-profiling>=1.8.1", "pytest-xdist>=3.8.0", diff --git a/rust-tests/prop_tests.rs b/rust-tests/prop_tests.rs new file mode 100644 index 0000000..d292191 --- /dev/null +++ b/rust-tests/prop_tests.rs @@ -0,0 +1,176 @@ +use gamecov_core::bktree::{hamming, BKTreeInner}; +use gamecov_core::monitor::CoverageTrackerInner; +use gamecov_core::unionfind::UnionFindInner; +use proptest::prelude::*; + +// ── BK-tree properties ─────────────────────────────────────────────────── + +proptest! { + #[test] + fn bktree_find_self(x in any::()) { + let mut tree = BKTreeInner::new(); + tree.add(x); + // x should always be within distance 0 of itself + assert!(tree.any_within(x, 0)); + let results = tree.find_all_within(x, 0); + assert!(results.contains(&x)); + } + + #[test] + fn bktree_no_false_negatives( + values in prop::collection::vec(any::(), 1..50), + radius in 0u32..10, + ) { + let mut tree = BKTreeInner::new(); + for &v in &values { + tree.add(v); + } + // Every value in the tree must be found by find_all_within on itself + for &v in &values { + let results = tree.find_all_within(v, radius); + assert!(results.contains(&v), "Tree must find the value itself"); + } + } + + #[test] + fn bktree_results_within_radius( + values in prop::collection::vec(any::(), 1..50), + query in any::(), + radius in 0u32..20, + ) { + let mut tree = BKTreeInner::new(); + for &v in &values { + tree.add(v); + } + let results = tree.find_all_within(query, radius); + // All returned values must actually be within radius + for &r in &results { + assert!( + hamming(query, r) <= radius, + "Result {} has distance {} from query {}, exceeds radius {}", + r, hamming(query, r), query, radius + ); + } + } + + #[test] + fn bktree_completeness( + values in prop::collection::vec(any::(), 1..30), + query in any::(), + radius in 0u32..10, + ) { + let mut tree = BKTreeInner::new(); + for &v in &values { + tree.add(v); + } + let results = tree.find_all_within(query, radius); + + // Brute-force: every value within radius must appear in results + let mut expected: Vec = values.iter() + .copied() + .filter(|&v| hamming(query, v) <= radius) + .collect(); + expected.sort(); + expected.dedup(); + + let mut got = results.clone(); + got.sort(); + got.dedup(); + + assert_eq!(got, expected, "BK-tree must return exactly the brute-force results"); + } +} + +// ── UnionFind properties ───────────────────────────────────────────────── + +proptest! { + #[test] + fn uf_component_count_nonnegative( + values in prop::collection::vec(any::(), 0..50), + unions in prop::collection::vec((any::(), any::()), 0..30), + ) { + let mut uf = UnionFindInner::new(); + let deduped: Vec = { + let mut s = std::collections::HashSet::new(); + values.into_iter().filter(|v| s.insert(*v)).collect() + }; + for &v in &deduped { + uf.make_set(v); + } + if !deduped.is_empty() { + for (ia, ib) in &unions { + let a = deduped[ia.index(deduped.len())]; + let b = deduped[ib.index(deduped.len())]; + uf.union(a, b); + } + } + assert!(uf.component_count() <= deduped.len()); + if !deduped.is_empty() { + assert!(uf.component_count() >= 1); + } + } + + #[test] + fn uf_union_is_symmetric(a in any::(), b in any::()) { + prop_assume!(a != b); + let mut uf1 = UnionFindInner::new(); + uf1.make_set(a); + uf1.make_set(b); + uf1.union(a, b); + + let mut uf2 = UnionFindInner::new(); + uf2.make_set(a); + uf2.make_set(b); + uf2.union(b, a); + + assert_eq!(uf1.find(a), uf1.find(b)); + assert_eq!(uf2.find(a), uf2.find(b)); + assert_eq!(uf1.component_count(), uf2.component_count()); + } +} + +// ── CoverageTracker properties ─────────────────────────────────────────── + +proptest! { + #[test] + fn tracker_total_unique_monotone( + hashes in prop::collection::vec(any::(), 1..100), + radius in 1u32..10, + ) { + let mut tracker = CoverageTrackerInner::new(radius); + let mut prev_unique = 0usize; + for &h in &hashes { + tracker.add_hash(h); + assert!( + tracker.total_unique() >= prev_unique, + "total_unique must be monotonically non-decreasing" + ); + prev_unique = tracker.total_unique(); + } + } + + #[test] + fn tracker_coverage_leq_unique( + hashes in prop::collection::vec(any::(), 1..100), + radius in 1u32..10, + ) { + let mut tracker = CoverageTrackerInner::new(radius); + for &h in &hashes { + tracker.add_hash(h); + } + assert!(tracker.coverage_count() <= tracker.total_unique()); + } + + #[test] + fn tracker_reset_clears_state( + hashes in prop::collection::vec(any::(), 1..50), + ) { + let mut tracker = CoverageTrackerInner::new(5); + for &h in &hashes { + tracker.add_hash(h); + } + tracker.reset(); + assert_eq!(tracker.coverage_count(), 0); + assert_eq!(tracker.total_unique(), 0); + } +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..4d07e88 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,4 @@ +max_width = 120 +fn_params_layout = "Tall" +reorder_imports = true +use_field_init_shorthand = true diff --git a/src/bktree.rs b/src/bktree.rs new file mode 100644 index 0000000..510aeab --- /dev/null +++ b/src/bktree.rs @@ -0,0 +1,174 @@ +use std::collections::HashMap; + +/// A node in the BK-tree arena. +struct BKNode { + val: u64, + children: HashMap, +} + +/// BK-tree for Hamming-distance nearest-neighbour queries on u64 hashes. +/// +/// Nodes are stored in a flat Vec (arena allocation) for cache friendliness. +pub struct BKTreeInner { + nodes: Vec, +} + +#[inline(always)] +pub fn hamming(a: u64, b: u64) -> u32 { + (a ^ b).count_ones() +} + +impl Default for BKTreeInner { + fn default() -> Self { + Self::new() + } +} + +impl BKTreeInner { + pub fn new() -> Self { + Self { nodes: Vec::new() } + } + + /// Insert a hash value. Returns false if exact duplicate (distance 0). + pub fn add(&mut self, x: u64) -> bool { + if self.nodes.is_empty() { + self.nodes.push(BKNode { + val: x, + children: HashMap::new(), + }); + return true; + } + + let mut idx = 0; + loop { + let d = hamming(x, self.nodes[idx].val); + if d == 0 { + return false; // exact duplicate + } + if let Some(&child_idx) = self.nodes[idx].children.get(&d) { + idx = child_idx; + } else { + let new_idx = self.nodes.len(); + self.nodes.push(BKNode { + val: x, + children: HashMap::new(), + }); + self.nodes[idx].children.insert(d, new_idx); + return true; + } + } + } + + /// Check if any value in the tree is within Hamming distance `radius` of `x`. + pub fn any_within(&self, x: u64, radius: u32) -> bool { + if self.nodes.is_empty() { + return false; + } + + let mut stack = vec![0usize]; + while let Some(idx) = stack.pop() { + let node = &self.nodes[idx]; + let d = hamming(x, node.val); + if d <= radius { + return true; + } + let lo = d.saturating_sub(radius); + let hi = d + radius; + for (&dd, &child_idx) in &node.children { + if dd >= lo && dd <= hi { + stack.push(child_idx); + } + } + } + false + } + + /// Return all values within Hamming distance `radius` of `x`. + pub fn find_all_within(&self, x: u64, radius: u32) -> Vec { + if self.nodes.is_empty() { + return Vec::new(); + } + + let mut results = Vec::new(); + let mut stack = vec![0usize]; + while let Some(idx) = stack.pop() { + let node = &self.nodes[idx]; + let d = hamming(x, node.val); + if d <= radius { + results.push(node.val); + } + let lo = d.saturating_sub(radius); + let hi = d + radius; + for (&dd, &child_idx) in &node.children { + if dd >= lo && dd <= hi { + stack.push(child_idx); + } + } + } + results + } + + pub fn len(&self) -> usize { + self.nodes.len() + } + + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_tree() { + let tree = BKTreeInner::new(); + assert!(tree.is_empty()); + assert!(!tree.any_within(42, 5)); + assert!(tree.find_all_within(42, 5).is_empty()); + } + + #[test] + fn test_add_and_exact_duplicate() { + let mut tree = BKTreeInner::new(); + assert!(tree.add(100)); + assert!(!tree.add(100)); // exact duplicate + assert_eq!(tree.len(), 1); + } + + #[test] + fn test_any_within() { + let mut tree = BKTreeInner::new(); + // 0b0000 and 0b0011 have Hamming distance 2 + tree.add(0b0000); + assert!(tree.any_within(0b0011, 2)); + assert!(tree.any_within(0b0011, 3)); + assert!(!tree.any_within(0b0011, 1)); + } + + #[test] + fn test_find_all_within() { + let mut tree = BKTreeInner::new(); + tree.add(0b0000); + tree.add(0b0001); // distance 1 from 0b0000 + tree.add(0b0011); // distance 2 from 0b0000 + tree.add(0b0111); // distance 3 from 0b0000 + tree.add(0b1111); // distance 4 from 0b0000 + + let results = tree.find_all_within(0b0000, 2); + assert_eq!(results.len(), 3); // 0b0000, 0b0001, 0b0011 + assert!(results.contains(&0b0000)); + assert!(results.contains(&0b0001)); + assert!(results.contains(&0b0011)); + } + + #[test] + fn test_hamming_distance() { + assert_eq!(hamming(0, 0), 0); + assert_eq!(hamming(0b1111, 0b0000), 4); + assert_eq!(hamming(0b1010, 0b0101), 4); + assert_eq!(hamming(0b1100, 0b1010), 2); + assert_eq!(hamming(u64::MAX, 0), 64); + } +} diff --git a/src/gamecov/__init__.py b/src/gamecov/__init__.py index abc9cd9..49e2298 100644 --- a/src/gamecov/__init__.py +++ b/src/gamecov/__init__.py @@ -1,7 +1,13 @@ from .cov_base import Coverage, CoverageItem, CoverageMonitor from .dedup import dedup_unique_frames from .frame import Frame, HashMethod -from .frame_cov import BKFrameMonitor, FrameCoverage, FrameMonitor, get_frame_cov +from .frame_cov import ( + BKFrameMonitor, + FrameCoverage, + FrameMonitor, + RustBKFrameMonitor, + get_frame_cov, +) from .loader import load_mp4, load_mp4_lazy from .stitch import stitch_images @@ -19,4 +25,5 @@ "Coverage", "CoverageMonitor", "BKFrameMonitor", + "RustBKFrameMonitor", ] diff --git a/src/gamecov/_gamecov_core.cpython-311-x86_64-linux-gnu.so b/src/gamecov/_gamecov_core.cpython-311-x86_64-linux-gnu.so new file mode 100755 index 0000000..948d55a Binary files /dev/null and b/src/gamecov/_gamecov_core.cpython-311-x86_64-linux-gnu.so differ diff --git a/src/gamecov/_gamecov_core.pyi b/src/gamecov/_gamecov_core.pyi new file mode 100644 index 0000000..cfbcd26 --- /dev/null +++ b/src/gamecov/_gamecov_core.pyi @@ -0,0 +1,23 @@ +class BKTree: + def __init__(self) -> None: ... + def add(self, x: int) -> bool: ... + def any_within(self, x: int, radius: int) -> bool: ... + def find_all_within(self, x: int, radius: int) -> list[int]: ... + def __len__(self) -> int: ... + +class UnionFind: + def __init__(self) -> None: ... + def make_set(self, x: int) -> None: ... + def find(self, x: int) -> int: ... + def union(self, a: int, b: int) -> None: ... + @property + def component_count(self) -> int: ... + +class CoverageTracker: + def __init__(self, radius: int) -> None: ... + def add_hash(self, x: int) -> bool: ... + @property + def coverage_count(self) -> int: ... + @property + def total_unique(self) -> int: ... + def reset(self) -> None: ... diff --git a/src/gamecov/frame_cov.py b/src/gamecov/frame_cov.py index 8a2bcca..0c13849 100644 --- a/src/gamecov/frame_cov.py +++ b/src/gamecov/frame_cov.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import hashlib from dataclasses import dataclass, field from typing import Iterable @@ -13,6 +15,15 @@ from .loader import load_mp4_lazy +def _imagehash_to_u64(img_hash: ImageHash) -> int: + """Convert an ImageHash to a u64 integer for the Rust backend.""" + hash_bytes = np.packbits( + np.asarray(img_hash.hash, dtype=np.uint8), + bitorder="big", + ).tobytes() + return int.from_bytes(hash_bytes, "big") + + def _trace_and_unique( frames: Iterable[Frame], threshold: int = RADIUS, @@ -243,14 +254,11 @@ def add_cov(self, cov: Coverage[ImageHash]) -> None: """ self.path_seen.add(cov.path_id) for img_hash in cov.coverage: - hash_bytes = np.packbits( - np.asarray(img_hash.hash, dtype=np.uint8), - bitorder="big", - ).tobytes() - if hash_bytes in self._exact_bytes: + x = _imagehash_to_u64(img_hash) + x_bytes = x.to_bytes(8, "big") + if x_bytes in self._exact_bytes: continue - x = int.from_bytes(hash_bytes, "big") neighbors = self._bktree.find_all_within(x, self.radius) self._uf.make_set(x) @@ -258,7 +266,7 @@ def add_cov(self, cov: Coverage[ImageHash]) -> None: self._uf.union(x, nb) self._bktree.add(x) - self._exact_bytes.add(hash_bytes) + self._exact_bytes.add(x_bytes) self.item_seen.add(img_hash) @property @@ -272,3 +280,55 @@ def reset(self) -> None: self._bktree = _BKTree() self._exact_bytes.clear() self._uf = _UnionFind() + + +class RustBKFrameMonitor(FrameMonitor): + """Rust-accelerated BKFrameMonitor using gamecov-core. + + Behaviorally identical to :class:`BKFrameMonitor` but delegates the + BK-tree, union-find, and coverage tracking to a compiled Rust extension + for significantly higher throughput. + + Requires the ``gamecov-core`` package to be installed. + """ + + def __init__(self, radius: int = RADIUS): + try: + from gamecov import _gamecov_core + except ImportError as exc: + raise ImportError( + "gamecov Rust extension not available. " + "Reinstall gamecov from source with a Rust toolchain." + ) from exc + super().__init__() + self._tracker: _gamecov_core.CoverageTracker = _gamecov_core.CoverageTracker( + radius + ) + self._exact: set[int] = set() + self.radius = radius + + def add_cov(self, cov: Coverage[ImageHash]) -> None: + """Add coverage using Rust-accelerated data structures.""" + self.path_seen.add(cov.path_id) + for img_hash in cov.coverage: + x = _imagehash_to_u64(img_hash) + if x in self._exact: + continue + + self._tracker.add_hash(x) + self._exact.add(x) + self.item_seen.add(img_hash) + + @property + def coverage_count(self) -> int: + """Order-independent coverage from Rust implementation.""" + count: int = self._tracker.coverage_count + return count + + def reset(self) -> None: + """Reset all monitor state.""" + super().reset() + from gamecov import _gamecov_core + + self._tracker = _gamecov_core.CoverageTracker(self.radius) + self._exact.clear() diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..456f929 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,127 @@ +use pyo3::prelude::*; + +pub mod bktree; +pub mod monitor; +pub mod unionfind; + +use bktree::BKTreeInner; +use monitor::CoverageTrackerInner; +use unionfind::UnionFindInner; + +// ── Python wrappers ─────────────────────────────────────────────────────── + +/// BK-tree for Hamming-distance queries on 64-bit perceptual hashes. +#[pyclass] +struct BKTree { + inner: BKTreeInner, +} + +#[pymethods] +impl BKTree { + #[new] + fn new() -> Self { + Self { + inner: BKTreeInner::new(), + } + } + + /// Insert a hash. Returns True if new, False if exact duplicate. + fn add(&mut self, x: u64) -> bool { + self.inner.add(x) + } + + /// Check if any stored hash is within Hamming distance `radius` of `x`. + fn any_within(&self, x: u64, radius: u32) -> bool { + self.inner.any_within(x, radius) + } + + /// Return all stored hashes within Hamming distance `radius` of `x`. + fn find_all_within(&self, x: u64, radius: u32) -> Vec { + self.inner.find_all_within(x, radius) + } + + fn __len__(&self) -> usize { + self.inner.len() + } +} + +/// Disjoint-set (union-find) over u64 keys. +#[pyclass] +struct UnionFind { + inner: UnionFindInner, +} + +#[pymethods] +impl UnionFind { + #[new] + fn new() -> Self { + Self { + inner: UnionFindInner::new(), + } + } + + fn make_set(&mut self, x: u64) { + self.inner.make_set(x) + } + + fn find(&mut self, x: u64) -> u64 { + self.inner.find(x) + } + + #[pyo3(name = "union")] + fn union_sets(&mut self, a: u64, b: u64) { + self.inner.union(a, b) + } + + #[getter] + fn component_count(&self) -> usize { + self.inner.component_count() + } +} + +/// Combined BK-tree + union-find coverage tracker. +/// +/// Drop-in replacement for the hot path of Python's BKFrameMonitor. +#[pyclass] +struct CoverageTracker { + inner: CoverageTrackerInner, +} + +#[pymethods] +impl CoverageTracker { + #[new] + fn new(radius: u32) -> Self { + Self { + inner: CoverageTrackerInner::new(radius), + } + } + + /// Insert a hash. Returns True if the hash was new. + fn add_hash(&mut self, x: u64) -> bool { + self.inner.add_hash(x) + } + + #[getter] + fn coverage_count(&self) -> usize { + self.inner.coverage_count() + } + + #[getter] + fn total_unique(&self) -> usize { + self.inner.total_unique() + } + + fn reset(&mut self) { + self.inner.reset() + } +} + +/// gamecov_core — Rust-accelerated core for gamecov frame coverage monitoring. +#[pymodule] +#[pyo3(name = "_gamecov_core")] +fn gamecov_core(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/src/monitor.rs b/src/monitor.rs new file mode 100644 index 0000000..56a7c5a --- /dev/null +++ b/src/monitor.rs @@ -0,0 +1,131 @@ +use std::collections::HashSet; + +use crate::bktree::BKTreeInner; +use crate::unionfind::UnionFindInner; + +/// Combined BK-tree + UnionFind coverage tracker. +/// +/// Mirrors the logic of Python's `BKFrameMonitor.add_cov()`: +/// each new hash is inserted into the BK-tree, all neighbours within +/// `radius` are found, and the hash is unioned with each neighbour. +/// Coverage is measured as the number of connected components. +pub struct CoverageTrackerInner { + bktree: BKTreeInner, + uf: UnionFindInner, + exact: HashSet, + radius: u32, +} + +impl CoverageTrackerInner { + pub fn new(radius: u32) -> Self { + Self { + bktree: BKTreeInner::new(), + uf: UnionFindInner::new(), + exact: HashSet::new(), + radius, + } + } + + /// Insert a hash. Returns true if the hash was new (not an exact duplicate). + pub fn add_hash(&mut self, x: u64) -> bool { + if !self.exact.insert(x) { + return false; // exact duplicate + } + + let neighbors = self.bktree.find_all_within(x, self.radius); + + self.uf.make_set(x); + for nb in &neighbors { + self.uf.union(x, *nb); + } + + self.bktree.add(x); + true + } + + pub fn coverage_count(&self) -> usize { + self.uf.component_count() + } + + pub fn total_unique(&self) -> usize { + self.exact.len() + } + + pub fn reset(&mut self) { + self.bktree = BKTreeInner::new(); + self.uf = UnionFindInner::new(); + self.exact.clear(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_tracker() { + let tracker = CoverageTrackerInner::new(5); + assert_eq!(tracker.coverage_count(), 0); + assert_eq!(tracker.total_unique(), 0); + } + + #[test] + fn test_single_hash() { + let mut tracker = CoverageTrackerInner::new(5); + assert!(tracker.add_hash(42)); + assert_eq!(tracker.coverage_count(), 1); + assert_eq!(tracker.total_unique(), 1); + } + + #[test] + fn test_exact_duplicate() { + let mut tracker = CoverageTrackerInner::new(5); + assert!(tracker.add_hash(42)); + assert!(!tracker.add_hash(42)); // duplicate + assert_eq!(tracker.total_unique(), 1); + assert_eq!(tracker.coverage_count(), 1); + } + + #[test] + fn test_nearby_hashes_merge() { + let mut tracker = CoverageTrackerInner::new(2); + // 0b0000 and 0b0001 have Hamming distance 1 (<= radius 2) + tracker.add_hash(0b0000); + tracker.add_hash(0b0001); + assert_eq!(tracker.total_unique(), 2); + assert_eq!(tracker.coverage_count(), 1); // merged into one component + } + + #[test] + fn test_distant_hashes_separate() { + let mut tracker = CoverageTrackerInner::new(1); + // 0b0000 and 0b0111 have Hamming distance 3 (> radius 1) + tracker.add_hash(0b0000); + tracker.add_hash(0b0111); + assert_eq!(tracker.total_unique(), 2); + assert_eq!(tracker.coverage_count(), 2); // separate components + } + + #[test] + fn test_bridging_reduces_components() { + let mut tracker = CoverageTrackerInner::new(1); + // A: 0b0000, B: 0b0011 (distance 2 from A, separate) + // C: 0b0001 (distance 1 from A, distance 1 from B -> bridges them) + tracker.add_hash(0b0000); + tracker.add_hash(0b0011); + assert_eq!(tracker.coverage_count(), 2); + + tracker.add_hash(0b0001); // bridges A and B + assert_eq!(tracker.coverage_count(), 1); + } + + #[test] + fn test_reset() { + let mut tracker = CoverageTrackerInner::new(5); + tracker.add_hash(1); + tracker.add_hash(2); + tracker.reset(); + assert_eq!(tracker.coverage_count(), 0); + assert_eq!(tracker.total_unique(), 0); + } +} diff --git a/src/unionfind.rs b/src/unionfind.rs new file mode 100644 index 0000000..321d0be --- /dev/null +++ b/src/unionfind.rs @@ -0,0 +1,138 @@ +use std::collections::HashMap; + +/// Disjoint-set (union-find) with path compression and union by rank. +/// +/// Maps arbitrary u64 hash values to internal indices for flat-array storage. +pub struct UnionFindInner { + /// Map from external u64 key to internal index. + key_to_idx: HashMap, + /// Map from internal index back to external u64 key. + idx_to_key: Vec, + parent: Vec, + rank: Vec, + count: usize, +} + +impl Default for UnionFindInner { + fn default() -> Self { + Self::new() + } +} + +impl UnionFindInner { + pub fn new() -> Self { + Self { + key_to_idx: HashMap::new(), + idx_to_key: Vec::new(), + parent: Vec::new(), + rank: Vec::new(), + count: 0, + } + } + + /// Register a new element. No-op if already present. + pub fn make_set(&mut self, x: u64) { + if self.key_to_idx.contains_key(&x) { + return; + } + let idx = self.parent.len(); + self.key_to_idx.insert(x, idx); + self.idx_to_key.push(x); + self.parent.push(idx); + self.rank.push(0); + self.count += 1; + } + + /// Find the representative of x (with path splitting). + pub fn find(&mut self, x: u64) -> u64 { + let idx = self.key_to_idx[&x]; + let root = self.find_idx(idx); + self.idx_to_key[root] + } + + fn find_idx(&mut self, mut idx: usize) -> usize { + while self.parent[idx] != idx { + // path splitting: point to grandparent + self.parent[idx] = self.parent[self.parent[idx]]; + idx = self.parent[idx]; + } + idx + } + + /// Union the sets containing a and b. + pub fn union(&mut self, a: u64, b: u64) { + let ia = self.key_to_idx[&a]; + let ib = self.key_to_idx[&b]; + let mut ra = self.find_idx(ia); + let mut rb = self.find_idx(ib); + if ra == rb { + return; + } + if self.rank[ra] < self.rank[rb] { + std::mem::swap(&mut ra, &mut rb); + } + self.parent[rb] = ra; + if self.rank[ra] == self.rank[rb] { + self.rank[ra] += 1; + } + self.count -= 1; + } + + pub fn component_count(&self) -> usize { + self.count + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty() { + let uf = UnionFindInner::new(); + assert_eq!(uf.component_count(), 0); + } + + #[test] + fn test_make_set() { + let mut uf = UnionFindInner::new(); + uf.make_set(10); + uf.make_set(20); + uf.make_set(10); // duplicate, no-op + assert_eq!(uf.component_count(), 2); + } + + #[test] + fn test_find_self() { + let mut uf = UnionFindInner::new(); + uf.make_set(42); + assert_eq!(uf.find(42), 42); + } + + #[test] + fn test_union() { + let mut uf = UnionFindInner::new(); + uf.make_set(1); + uf.make_set(2); + uf.make_set(3); + assert_eq!(uf.component_count(), 3); + + uf.union(1, 2); + assert_eq!(uf.component_count(), 2); + assert_eq!(uf.find(1), uf.find(2)); + + uf.union(2, 3); + assert_eq!(uf.component_count(), 1); + assert_eq!(uf.find(1), uf.find(3)); + } + + #[test] + fn test_union_idempotent() { + let mut uf = UnionFindInner::new(); + uf.make_set(1); + uf.make_set(2); + uf.union(1, 2); + uf.union(1, 2); // no-op + assert_eq!(uf.component_count(), 1); + } +} diff --git a/tests/test_rust_frame_monitor.py b/tests/test_rust_frame_monitor.py new file mode 100644 index 0000000..0e3af24 --- /dev/null +++ b/tests/test_rust_frame_monitor.py @@ -0,0 +1,133 @@ +"""Tests for RustBKFrameMonitor: differential correctness and monotonicity.""" +import os +import random +import tempfile + +import pytest +from hypothesis import given, settings, strategies as st, HealthCheck + +pytest.importorskip("gamecov._gamecov_core") + +from gamecov import FrameCoverage, BKFrameMonitor +from gamecov.frame_cov import RustBKFrameMonitor +import gamecov.generator as cg +from gamecov.writer import write_mp4 + +N_MAX = int(os.getenv("N_MAX", 100)) + + +@settings( + deadline=None, + suppress_health_check=(HealthCheck.data_too_large, HealthCheck.too_slow), +) +@given(data=st.data(), n=st.integers(min_value=1, max_value=30)) +def test_differential_python_vs_rust(data, n): + """BKFrameMonitor and RustBKFrameMonitor must produce identical results.""" + created_files: list[str] = [] + covs: list[FrameCoverage] = [] + + for _ in range(n): + frames = data.draw(cg.frames_lists) + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_f: + output_path = tmp_f.name + created_files.append(output_path) + write_mp4(frames, output_path) + covs.append(FrameCoverage(output_path)) + + py_monitor = BKFrameMonitor() + rust_monitor = RustBKFrameMonitor() + + for cov in covs: + if not py_monitor.is_seen(cov): + py_monitor.add_cov(cov) + if not rust_monitor.is_seen(cov): + rust_monitor.add_cov(cov) + + assert py_monitor.coverage_count == rust_monitor.coverage_count, ( + f"coverage_count mismatch: Python={py_monitor.coverage_count} " + f"Rust={rust_monitor.coverage_count}" + ) + assert len(py_monitor.item_seen) == len(rust_monitor.item_seen), ( + f"item_seen count mismatch: Python={len(py_monitor.item_seen)} " + f"Rust={len(rust_monitor.item_seen)}" + ) + + for f in created_files: + os.remove(f) + + +@settings( + deadline=None, + suppress_health_check=(HealthCheck.data_too_large, HealthCheck.too_slow), +) +@given(data=st.data(), n=st.integers(min_value=1, max_value=30)) +def test_rust_order_independent_coverage(data, n): + """RustBKFrameMonitor.coverage_count must be order-independent.""" + created_files: list[str] = [] + covs: list[FrameCoverage] = [] + + for _ in range(n): + frames = data.draw(cg.frames_lists) + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_f: + output_path = tmp_f.name + created_files.append(output_path) + write_mp4(frames, output_path) + covs.append(FrameCoverage(output_path)) + + # Original order + monitor_a = RustBKFrameMonitor() + for cov in covs: + if not monitor_a.is_seen(cov): + monitor_a.add_cov(cov) + + # Reversed order + monitor_b = RustBKFrameMonitor() + for cov in reversed(covs): + if not monitor_b.is_seen(cov): + monitor_b.add_cov(cov) + + # Shuffled order + shuffled = list(covs) + random.shuffle(shuffled) + monitor_c = RustBKFrameMonitor() + for cov in shuffled: + if not monitor_c.is_seen(cov): + monitor_c.add_cov(cov) + + assert monitor_a.coverage_count == monitor_b.coverage_count + assert monitor_a.coverage_count == monitor_c.coverage_count + assert len(monitor_a.item_seen) == len(monitor_b.item_seen) + assert len(monitor_a.item_seen) == len(monitor_c.item_seen) + + for f in created_files: + os.remove(f) + + +@settings( + deadline=None, + suppress_health_check=(HealthCheck.data_too_large, HealthCheck.too_slow), +) +@given(data=st.data(), n=st.integers(min_value=1, max_value=N_MAX)) +def test_rust_monotone(data, n): + """len(item_seen) must be monotonically non-decreasing for RustBKFrameMonitor.""" + monitor = RustBKFrameMonitor() + prev_item_count = 0 + created_files = [] + for _ in range(n): + frames = data.draw(cg.frames_lists) + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_f: + output_path = tmp_f.name + created_files.append(output_path) + write_mp4(frames, output_path) + cov = FrameCoverage(output_path) + if not monitor.is_seen(cov): + monitor.add_cov(cov) + + assert len(monitor.item_seen) >= prev_item_count, ( + "item_seen count should not decrease" + ) + prev_item_count = len(monitor.item_seen) + + for f in created_files: + os.remove(f) diff --git a/uv.lock b/uv.lock index 0526a09..b82d9bf 100644 --- a/uv.lock +++ b/uv.lock @@ -213,7 +213,6 @@ dependencies = [ { name = "hypothesis" }, { name = "imagehash" }, { name = "imageio", extra = ["ffmpeg"] }, - { name = "numba" }, { name = "numpy" }, { name = "opencv-python" }, { name = "pillow" }, @@ -229,6 +228,7 @@ dependencies = [ dev = [ { name = "mypy" }, { name = "pre-commit" }, + { name = "pytest-benchmark" }, { name = "pytest-cov" }, { name = "pytest-profiling" }, { name = "pytest-xdist" }, @@ -242,7 +242,6 @@ requires-dist = [ { name = "hypothesis", specifier = ">=6.136.7" }, { name = "imagehash", specifier = ">=4.3.2" }, { name = "imageio", extras = ["ffmpeg"], specifier = ">=2.37.0" }, - { name = "numba", specifier = ">=0.61.2" }, { name = "numpy", specifier = ">=2.0.0" }, { name = "opencv-python", specifier = ">=4.11.0.86" }, { name = "pillow", specifier = ">=11.3.0" }, @@ -258,6 +257,7 @@ requires-dist = [ dev = [ { name = "mypy", specifier = ">=1.15.0" }, { name = "pre-commit", specifier = ">=4.5.1" }, + { name = "pytest-benchmark", specifier = ">=5.1.0" }, { name = "pytest-cov", specifier = ">=6.2.1" }, { name = "pytest-profiling", specifier = ">=1.8.1" }, { name = "pytest-xdist", specifier = ">=3.8.0" }, @@ -691,6 +691,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335 }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -716,6 +725,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474 }, ] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py-cpuinfo" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255 }, +] + [[package]] name = "pytest-cov" version = "6.2.1"