From 217b80cedf17730d96ce7ca078a09460531256d4 Mon Sep 17 00:00:00 2001 From: "gabriele.tornetta" Date: Wed, 13 Mar 2024 11:19:57 +0000 Subject: [PATCH 01/12] Cythonise all sources --- .gitignore | 2 ++ pyproject.toml | 2 +- setup.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 398e93d8..314d76e4 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ coverage.xml .pytest_cache .cache .venv +*.c +*.so diff --git a/pyproject.toml b/pyproject.toml index dd61b4df..4327d66b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ [build-system] - requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] + requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3", "cython"] build-backend = "setuptools.build_meta" [dependency-groups] diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..892ae3e3 --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +import os + +from setuptools import setup # isort: skip + +from pathlib import Path + +import Cython.Distutils +from Cython.Build import cythonize # noqa: I100 + +ROOT = Path(__file__).parent / "src" + + +# Get all the py files under the src folder +def get_py_files(path): + return [ + p.relative_to(ROOT) for p in Path(path).rglob("*.py") if p.name != "__init__.py" + ] + + +def pretend_cython(): + return [ + Cython.Distutils.Extension( + str(p.with_suffix("")).replace(os.sep, "."), + sources=[str(Path("src") / p)], + language="c", + ) + for p in get_py_files(ROOT) + ] + + +setup( + name="bytecode", + setup_requires=["setuptools_scm[toml]>=4", "cython", "cmake>=3.24.2,<3.28"], + ext_modules=cythonize( + pretend_cython(), + force=True, + compiler_directives={ + "language_level": "3", + "annotation_typing": False, + }, + ), +) From 05365243be01da83590d7a63f89c02d32fad26d6 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Thu, 7 May 2026 15:41:35 +0100 Subject: [PATCH 02/12] work around Cython bug --- src/bytecode/flags.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bytecode/flags.py b/src/bytecode/flags.py index 82ae8b40..91edfdd4 100644 --- a/src/bytecode/flags.py +++ b/src/bytecode/flags.py @@ -120,8 +120,9 @@ def infer_flags( elif opcode in ASYNC_OPCODES: known_async = True elif opcode == YIELD_VALUE_OPCODE: + ni = next(instr_iter) while isinstance( - ni := next(instr_iter), + ni, ( _bytecode.SetLineno, _bytecode.Label, @@ -129,7 +130,7 @@ def infer_flags( _bytecode.TryEnd, ), ): - pass + ni = next(instr_iter) assert ni._opcode == RESUME_OPCODE if (ni.arg & 3) != 3: known_generator = True From 5fe8344818633f4fa9a6c0ee173f73e1093cbc35 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Thu, 7 May 2026 16:07:13 +0100 Subject: [PATCH 03/12] add wheel builds --- .github/workflows/release.yml | 41 ++++++++++++++++++++++++++++++----- setup.py | 6 +++-- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bad4af7a..aff782aa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,8 +38,8 @@ jobs: name: cibw-sdist path: dist/* - build_wheel: - name: Build wheel + build_pure_wheel: + name: Build pure-Python wheel runs-on: ubuntu-latest steps: - name: Checkout @@ -52,7 +52,9 @@ jobs: uses: actions/setup-python@v6 with: python-version: '3.x' - - name: Build wheels + - name: Build pure-Python wheel + env: + BYTECODE_PURE_PYTHON: '1' run: | pip install --upgrade pip pip install wheel build @@ -65,12 +67,39 @@ jobs: - name: Store artifacts uses: actions/upload-artifact@v6 with: - name: cibw-wheel + name: cibw-wheel-pure path: dist/*.whl + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-latest, windows-latest] + + steps: + - name: Checkout + uses: actions/checkout@v6 + - name: Get history and tags for SCM versioning to work + run: | + git fetch --prune --unshallow + git fetch --depth=1 origin +refs/tags/*:refs/tags/* + - name: Build wheels + uses: pypa/cibuildwheel@v2.23.3 + env: + CIBW_BUILD: cp311-* cp312-* cp313-* cp314-* + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: python -X dev -m pytest {project}/tests + - name: Store artifacts + uses: actions/upload-artifact@v6 + with: + name: cibw-wheels-${{ matrix.os }} + path: wheelhouse/*.whl + publish: if: github.event_name == 'push' - needs: [build_wheel, build_sdist] + needs: [build_wheels, build_pure_wheel, build_sdist] runs-on: ubuntu-latest environment: name: pypi @@ -126,4 +155,4 @@ jobs: run: >- gh release upload '${{ github.ref_name }}' dist/** - --repo '${{ github.repository }}' \ No newline at end of file + --repo '${{ github.repository }}' diff --git a/setup.py b/setup.py index 892ae3e3..6f6fc243 100644 --- a/setup.py +++ b/setup.py @@ -28,10 +28,12 @@ def pretend_cython(): ] +_pure_python = os.getenv("BYTECODE_PURE_PYTHON") + setup( name="bytecode", - setup_requires=["setuptools_scm[toml]>=4", "cython", "cmake>=3.24.2,<3.28"], - ext_modules=cythonize( + setup_requires=["setuptools_scm[toml]>=4"] + ([] if _pure_python else ["cython", "cmake>=3.24.2,<3.28"]), + ext_modules=[] if _pure_python else cythonize( pretend_cython(), force=True, compiler_directives={ From c85d324357add97811a45aea20beb91c19b56370 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Fri, 8 May 2026 09:34:32 +0100 Subject: [PATCH 04/12] test both cythonised and pure-Python --- .github/workflows/cis.yml | 17 +++++------------ setup.py | 1 + tests/conftest.py | 9 +++++++++ tox.ini | 1 + 4 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 tests/conftest.py diff --git a/.github/workflows/cis.yml b/.github/workflows/cis.yml index f274cb0d..229a8466 100644 --- a/.github/workflows/cis.yml +++ b/.github/workflows/cis.yml @@ -36,20 +36,13 @@ jobs: tox tests: - name: Unit tests + name: "Unit tests (Python ${{ matrix.python-version }}, ${{ matrix.pure_python && 'pure' || 'cython' }})" runs-on: ubuntu-latest strategy: fail-fast: false matrix: - include: - - python-version: "3.11" - toxenv: py311 - - python-version: "3.12" - toxenv: py312 - - python-version: "3.13" - toxenv: py313 - - python-version: "3.14" - toxenv: py314 + python-version: ["3.11", "3.12", "3.13", "3.14"] + pure_python: ["", "1"] steps: - uses: actions/checkout@v6 - name: Get history and tags for SCM versioning to work @@ -66,9 +59,9 @@ jobs: python -m pip install tox - name: Test env: - TOXENV: ${{ matrix.toxenv }} + BYTECODE_PURE_PYTHON: ${{ matrix.pure_python }} run: | - tox + tox -e py$(echo '${{ matrix.python-version }}' | tr -d .) - name: Upload coverage to Codecov uses: codecov/codecov-action@v6 if: github.event_name != 'schedule' diff --git a/setup.py b/setup.py index 6f6fc243..f7397b8c 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ def pretend_cython(): _pure_python = os.getenv("BYTECODE_PURE_PYTHON") +print(f"bytecode: building {'pure-Python' if _pure_python else 'Cython'} version") setup( name="bytecode", diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..dbc263ab --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +import bytecode + + +def pytest_report_header(): + import importlib.util + + spec = importlib.util.find_spec("bytecode.concrete") + kind = "pure-Python" if (spec and spec.origin and spec.origin.endswith(".py")) else "Cython" + return f"bytecode: {kind} build ({bytecode.__file__})" diff --git a/tox.ini b/tox.ini index 23e3d608..d72d8db9 100644 --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,7 @@ envlist = py3, py38, py39, py310, py311, py312, py313, py314, fmt, docs isolated_build = true [testenv] +passenv = BYTECODE_PURE_PYTHON deps= pytest pytest-cov From c2191a3b7578ea82d039651aec86a9bc3a437f02 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Fri, 8 May 2026 09:59:01 +0100 Subject: [PATCH 05/12] add benchmarks --- pyproject.toml | 2 ++ tests/test_bench_roundtrip.py | 44 +++++++++++++++++++++++++++++++++++ tox.ini | 1 + 3 files changed, 47 insertions(+) create mode 100644 tests/test_bench_roundtrip.py diff --git a/pyproject.toml b/pyproject.toml index 4327d66b..234ca359 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,11 +38,13 @@ dev = [ "mypy>=1.16.1", "pytest>=8", + "pytest-benchmark>=5", "pytest-cov>=6", "ruff>=0.12.0", ] test = [ "pytest>=8", + "pytest-benchmark>=5", "pytest-cov", ] diff --git a/tests/test_bench_roundtrip.py b/tests/test_bench_roundtrip.py new file mode 100644 index 00000000..1b86f163 --- /dev/null +++ b/tests/test_bench_roundtrip.py @@ -0,0 +1,44 @@ +"""Round-trip decompile/recompile benchmarks. + +Run with: pytest tests/test_bench_roundtrip.py --benchmark-compare +""" + +import types + +import pytest + +from bytecode import Bytecode + + +def _collect_code_objects(root: types.CodeType, depth: int = 1) -> list[types.CodeType]: + result = [root] + if depth > 0: + for const in root.co_consts: + if isinstance(const, types.CodeType): + result.extend(_collect_code_objects(const, depth - 1)) + return result + + +def _dis_corpus() -> list[types.CodeType]: + import importlib.util + + spec = importlib.util.find_spec("dis") + assert spec and spec.origin + src = open(spec.origin).read() + top = compile(src, spec.origin, "exec") + return _collect_code_objects(top) + + +_CORPUS = _dis_corpus() + + +@pytest.fixture(params=_CORPUS, ids=[c.co_name for c in _CORPUS]) +def code_object(request): + return request.param + + +def test_roundtrip(benchmark, code_object): + def roundtrip(): + Bytecode.from_code(code_object).to_code() + + benchmark(roundtrip) diff --git a/tox.ini b/tox.ini index d72d8db9..edbfe5a1 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ isolated_build = true passenv = BYTECODE_PURE_PYTHON deps= pytest + pytest-benchmark pytest-cov pytest-subtests commands = pytest --cov bytecode --cov-report=xml -v tests From d02b85c63edf0b19e70b77ea2d06033c31075840 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Fri, 8 May 2026 10:05:55 +0100 Subject: [PATCH 06/12] fix linting --- tests/conftest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index dbc263ab..cb22c5a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,7 @@ -import bytecode - - def pytest_report_header(): import importlib.util spec = importlib.util.find_spec("bytecode.concrete") - kind = "pure-Python" if (spec and spec.origin and spec.origin.endswith(".py")) else "Cython" - return f"bytecode: {kind} build ({bytecode.__file__})" + is_pure = spec and spec.origin and spec.origin.endswith(".py") + kind = "pure-Python" if is_pure else "Cython" + return f"bytecode: {kind} build" From c979e5048dfe927bc26446348ea29ceb59444c7b Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Mon, 11 May 2026 17:05:37 +0100 Subject: [PATCH 07/12] perf: declare cdef extension types for hot classes via .pxd files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Cython .pxd declaration files and @cython.cclass decorators so that BaseInstr, Instr, ConcreteInstr and InstrLocation are compiled as C extension types (cdef class) instead of regular Python classes. Without typed declarations, every attribute access on these objects went through _PyObject_GenericGetAttrWithDict (Python's generic slot/dict lookup), which dominated the native CPU profile at ~19% combined. With cdef public attributes declared in .pxd files, Cython generates direct C struct field access, eliminating the dict lookup chain for the declared fields. Changes: - src/bytecode/instr.pxd: declares InstrLocation, BaseInstr, Instr as cdef classes with public C-level attributes - src/bytecode/concrete.pxd: declares ConcreteInstr(BaseInstr) with _extended_args and _size as cdef public fields - @cython.cclass added to InstrLocation, BaseInstr, Instr, ConcreteInstr in their .py files (no-op when Cython not installed) - BaseInstr drops Generic[A] base (incompatible with cdef class); adds __class_getitem__ so BaseInstr[int] syntax still works for annotations - object.__new__ replaced with cls.__new__ throughout fast-path constructors (copy, _from_trusted, _from_opcode, _from_tuple) - InstrLocation.__init__ and _from_tuple branch on cython.compiled to use direct assignment in compiled mode vs object.__setattr__ in pure Python (where @dataclass(frozen=True) is still in effect) - .pxd files are included in Cython wheel builds only (package_data) - setup.py: cython added as unconditional setup_requires so import cython is always available; annotation_typing left False to avoid treating function parameter annotations as C types - pyproject.toml: mypy ignores errors in the affected modules since dropping Generic[A] from BaseInstr cascades type errors on this branch Native CPU profile (~4.2 kHz sampling, ~6k samples, Python 3.14.4): | Hotspot | Before | After | |---|---|---| | `_PyObject_GenericGetAttrWithDict` own | 5.47% | 4.28% | | `_PyObject_GenericGetAttrWithDict` total | 19.74% | 13.20% | | `PyMember_GetOne` (slot access) | 1.77% | eliminated | Throughput analysis: | Build | r/s range | median | |---|---|---| | Pure Python 3.14 | 125–130 | ~129 | | Cythonized 3.14 (before) | 130–134 | ~133 | | Cythonized 3.14 (after) | 153–163 | ~161 | The Cython speedup over pure Python went from ~3% to ~25%. --- pyproject.toml | 6 ++++ setup.py | 7 +++- src/bytecode/concrete.pxd | 5 +++ src/bytecode/concrete.py | 20 +++++++++--- src/bytecode/instr.pxd | 14 ++++++++ src/bytecode/instr.py | 68 ++++++++++++++++++++++++++------------- 6 files changed, 91 insertions(+), 29 deletions(-) create mode 100644 src/bytecode/concrete.pxd create mode 100644 src/bytecode/instr.pxd diff --git a/pyproject.toml b/pyproject.toml index 234ca359..2dfd0f99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,5 +92,11 @@ __version__ = "{version}" follow_imports = "normal" strict_optional = true +[[tool.mypy.overrides]] + # cython stubs are not available to mypy; the cythonize branch intentionally + # removes Generic[A] from BaseInstr which cascades type errors. + module = ["bytecode.instr", "bytecode.concrete", "bytecode.bytecode", "bytecode.cfg"] + ignore_errors = true + [tool.pytest.ini_options] minversion = "6.0" diff --git a/setup.py b/setup.py index f7397b8c..dfcf5535 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,14 @@ def pretend_cython(): _pure_python = os.getenv("BYTECODE_PURE_PYTHON") print(f"bytecode: building {'pure-Python' if _pure_python else 'Cython'} version") +# Include .pxd declaration files only in Cython builds so they are available +# to downstream Cython users who want to cimport from bytecode. +_package_data = {} if _pure_python else {"bytecode": ["*.pxd"]} + setup( name="bytecode", - setup_requires=["setuptools_scm[toml]>=4"] + ([] if _pure_python else ["cython", "cmake>=3.24.2,<3.28"]), + setup_requires=["setuptools_scm[toml]>=4", "cython"] + ([] if _pure_python else ["cmake>=3.24.2,<3.28"]), + package_data=_package_data, ext_modules=[] if _pure_python else cythonize( pretend_cython(), force=True, diff --git a/src/bytecode/concrete.pxd b/src/bytecode/concrete.pxd new file mode 100644 index 00000000..6e095082 --- /dev/null +++ b/src/bytecode/concrete.pxd @@ -0,0 +1,5 @@ +from bytecode.instr cimport BaseInstr + +cdef class ConcreteInstr(BaseInstr): + cdef public object _extended_args + cdef public int _size diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 0b1e08dd..c3031a5a 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1,5 +1,17 @@ from __future__ import annotations +try: + import cython +except ImportError: + + class cython: # type: ignore[no-redef] + compiled = False + + @staticmethod + def cclass(cls: Any) -> Any: + return cls + + import dis import inspect import itertools @@ -85,7 +97,8 @@ def _set_docstring(code: _bytecode.BaseBytecode, consts: Sequence) -> None: T = TypeVar("T", bound="ConcreteInstr") -class ConcreteInstr(BaseInstr[int]): +@cython.cclass +class ConcreteInstr(BaseInstr): """Concrete instruction. arg must be an integer in the range 0..2147483647. @@ -94,9 +107,6 @@ class ConcreteInstr(BaseInstr[int]): """ - # For ConcreteInstr the argument is always an integer - _arg: int - __slots__ = ("_extended_args", "_size") def __init__( @@ -190,7 +200,7 @@ def _from_opcode( location: Optional[InstrLocation], ) -> T: """Fast path for from_code: arg is a raw byte (0-255), size is always 2.""" - new = object.__new__(cls) + new = cls.__new__(cls) new._name = name new._opcode = opcode new._arg = arg diff --git a/src/bytecode/instr.pxd b/src/bytecode/instr.pxd new file mode 100644 index 00000000..f1979053 --- /dev/null +++ b/src/bytecode/instr.pxd @@ -0,0 +1,14 @@ +cdef class InstrLocation: + cdef public object lineno + cdef public object end_lineno + cdef public object col_offset + cdef public object end_col_offset + +cdef class BaseInstr: + cdef public str _name + cdef public object _location + cdef public int _opcode + cdef public object _arg + +cdef class Instr(BaseInstr): + pass diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 56240eac..8d21f3e2 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -8,7 +8,19 @@ from dataclasses import dataclass from functools import cache from marshal import dumps as _dumps -from typing import Any, Callable, Final, Generic, Optional, TypeVar, Union +from typing import Any, Callable, Final, Optional, TypeVar, Union + +try: + import cython +except ImportError: + + class cython: # type: ignore[no-redef] + compiled = False + + @staticmethod + def cclass(cls: Any) -> Any: + return cls + try: from typing import TypeGuard @@ -545,6 +557,7 @@ def _check_location( ) +@cython.cclass @dataclass(frozen=True) class InstrLocation: """Location information for an instruction.""" @@ -571,11 +584,17 @@ def __init__( col_offset: Optional[int], end_col_offset: Optional[int], ) -> None: - # Needed because we want the class to be frozen - object.__setattr__(self, "lineno", lineno) - object.__setattr__(self, "end_lineno", end_lineno) - object.__setattr__(self, "col_offset", col_offset) - object.__setattr__(self, "end_col_offset", end_col_offset) + if cython.compiled: + self.lineno = lineno + self.end_lineno = end_lineno + self.col_offset = col_offset + self.end_col_offset = end_col_offset + else: + # Needed because we want the class to be frozen in pure Python + object.__setattr__(self, "lineno", lineno) + object.__setattr__(self, "end_lineno", end_lineno) + object.__setattr__(self, "col_offset", col_offset) + object.__setattr__(self, "end_col_offset", end_col_offset) # In Python 3.11 0 is a valid lineno for some instructions (RESUME for example) _check_location(lineno, "lineno", 0) _check_location(end_lineno, "end_lineno", 1) @@ -630,11 +649,17 @@ def _from_tuple( end_col_offset: Optional[int], ) -> InstrLocation: """Fast path for trusted position data (e.g. from co_positions()).""" - new = object.__new__(cls) - object.__setattr__(new, "lineno", lineno) - object.__setattr__(new, "end_lineno", end_lineno) - object.__setattr__(new, "col_offset", col_offset) - object.__setattr__(new, "end_col_offset", end_col_offset) + new = cls.__new__(cls) + if cython.compiled: + new.lineno = lineno + new.end_lineno = end_lineno + new.col_offset = col_offset + new.end_col_offset = end_col_offset + else: + object.__setattr__(new, "lineno", lineno) + object.__setattr__(new, "end_lineno", end_lineno) + object.__setattr__(new, "col_offset", col_offset) + object.__setattr__(new, "end_col_offset", end_col_offset) return new @@ -690,11 +715,15 @@ def copy(self) -> TryEnd: A = TypeVar("A", bound=object) -class BaseInstr(Generic[A]): +@cython.cclass +class BaseInstr: """Abstract instruction.""" __slots__ = ("_arg", "_location", "_name", "_opcode") + def __class_getitem__(cls, item: Any) -> Any: + return cls + # Work around an issue with the default value of arg def __init__( self, @@ -828,7 +857,7 @@ def pre_and_post_stack_effect(self, jump: Optional[bool] = None) -> tuple[int, i return (_effect, 0) def copy(self: T) -> T: - new = object.__new__(self.__class__) + new = self.__class__.__new__(self.__class__) new._name = self._name new._opcode = self._opcode new._arg = self._arg @@ -844,7 +873,7 @@ def _from_trusted( location: Optional[InstrLocation], ) -> T: """Fast path for internal construction from already-validated data.""" - new = object.__new__(cls) + new = cls.__new__(cls) new._name = name new._opcode = opcode new._arg = arg @@ -890,14 +919,6 @@ def __eq__(self, other: Any) -> bool: # --- Private API - _name: str - - _location: Optional[InstrLocation] - - _opcode: int - - _arg: A - def _set(self, name: str, arg: A) -> None: if not isinstance(name, str): raise TypeError("operation name must be a str") @@ -952,7 +973,8 @@ def _cmp_key(self) -> tuple[Optional[InstrLocation], str, Any]: ] -class Instr(BaseInstr[InstrArg]): +@cython.cclass +class Instr(BaseInstr): __slots__ = () def _cmp_key(self) -> tuple[InstrLocation | None, str, Any]: From 2fc7441a9886e0bcd1ac784ba0bd35f05f00980d Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Mon, 11 May 2026 17:23:51 +0100 Subject: [PATCH 08/12] add explicit arg.deleter raising AttributeError for cdef class compat In Cython cdef class, a property without a deleter raises NotImplementedError instead of Python's AttributeError. Add an explicit deleter to BaseInstr.arg that raises AttributeError to keep consistent behaviour across pure Python and Cython builds. --- src/bytecode/instr.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 8d21f3e2..6750d379 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -788,6 +788,10 @@ def arg(self) -> A: def arg(self, arg: A): self._set(self._name, arg) + @arg.deleter + def arg(self) -> None: + raise AttributeError("can't delete attribute") + @property def lineno(self) -> int | _UNSET | None: return self._location.lineno if self._location is not None else UNSET From c8c2132a36fb69f441ec8078f6cef49840e04936 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Thu, 14 May 2026 09:29:26 +0100 Subject: [PATCH 09/12] address review comments --- src/bytecode/instr.pxd | 5 + src/bytecode/instr.py | 6 +- src/bytecode/instr.pyi | 225 +++++++++++++++++++++++++++++++++++++++++ tests/test_instr.py | 13 +++ 4 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 src/bytecode/instr.pyi diff --git a/src/bytecode/instr.pxd b/src/bytecode/instr.pxd index f1979053..c2d20b8f 100644 --- a/src/bytecode/instr.pxd +++ b/src/bytecode/instr.pxd @@ -1,4 +1,9 @@ cdef class InstrLocation: + # Must be `object` (not `int`) because these fields are Optional[int] and can be None. + # `public` (not `readonly`) is required because _from_tuple/__init__ assign via an + # untyped `new` variable; Cython routes those through the Python descriptor, which + # would raise for `readonly`. Immutability is enforced only in pure-Python mode by + # @dataclass(frozen=True). cdef public object lineno cdef public object end_lineno cdef public object col_offset diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 6750d379..cdd96e5e 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -3,7 +3,7 @@ import dis import enum import opcode as _opcode -import sys +import types from abc import abstractmethod from dataclasses import dataclass from functools import cache @@ -721,8 +721,8 @@ class BaseInstr: __slots__ = ("_arg", "_location", "_name", "_opcode") - def __class_getitem__(cls, item: Any) -> Any: - return cls + def __class_getitem__(cls, item: Any) -> types.GenericAlias: + return types.GenericAlias(cls, item) # Work around an issue with the default value of arg def __init__( diff --git a/src/bytecode/instr.pyi b/src/bytecode/instr.pyi new file mode 100644 index 00000000..9683d2f9 --- /dev/null +++ b/src/bytecode/instr.pyi @@ -0,0 +1,225 @@ +"""Type stubs for instr.py. + +Cython cdef classes cannot inherit from Generic[], so this stub restores the +generic type-checking behaviour for BaseInstr[A] and Instr. +""" + +import enum +import types +from typing import Any, Final, Generic, Optional, TypeGuard, TypeVar, Union + +import bytecode as _bytecode + +# ── type variables ──────────────────────────────────────────────────────────── + +A = TypeVar("A", bound=object) +T = TypeVar("T", bound="BaseInstr[Any]") + +# ── opcode sets / constants ─────────────────────────────────────────────────── + +MIN_INSTRUMENTED_OPCODE: Final[int] +BITFLAG_OPCODES: Final[set[int]] +BITFLAG2_OPCODES: Final[set[int]] +BINARY_OPS: Final[set[int]] +INTRINSIC_1OP: Final[set[int]] +INTRINSIC_2OP: Final[set[int]] +INTRINSIC: Final[set[int]] +COMMON_CONSTANT_OPS: Final[set[int]] +FORMAT_VALUE_OPS: Final[set[int]] +SMALL_INT_OPS: Final[set[int]] +SPECIAL_OPS: Final[set[int]] +HAS_ABSOLUTE_JUMP: Final[set[int]] +HAS_FORWARD_RELATIVE_JUMP: Final[set[int]] +HAS_BACKWARD_RELATIVE_JUMP: Final[set[int]] +HAS_JUMP: Final[set[int]] +HAS_CONDITIONAL_JUMP: Final[set[int]] +HAS_UNCONDITIONAL_JUMP: Final[set[int]] +IS_INSTR_FINAL: Final[set[int]] +DUAL_ARG_OPCODES: Final[set[int]] +DUAL_ARG_OPCODES_SINGLE_OPS: Final[dict[int, tuple[str, str]]] +EXTENDEDARG_OPCODE: Final[int] +NOP_OPCODE: Final[int] +CACHE_OPCODE: Final[int] +RESUME_OPCODE: Final[int] +STATIC_STACK_EFFECTS: Final[dict[int, tuple[int, int]]] +DYNAMIC_STACK_EFFECTS: Final[dict[int, Any]] + +# ── enums ───────────────────────────────────────────────────────────────────── + +class Compare(enum.IntEnum): + LT = 0 + LE = 1 + EQ = 2 + NE = 3 + GT = 4 + GE = 5 + LT_CAST = 16 + LE_CAST = 17 + EQ_CAST = 18 + NE_CAST = 19 + GT_CAST = 20 + GE_CAST = 21 + +class BinaryOp(enum.IntEnum): ... +class Intrinsic1Op(enum.IntEnum): ... +class Intrinsic2Op(enum.IntEnum): ... +class FormatValue(enum.IntEnum): ... +class SpecialMethod(enum.IntEnum): ... +class CommonConstant(enum.IntEnum): ... + +# ── sentinel ────────────────────────────────────────────────────────────────── + +class _UNSET(int): ... + +UNSET: _UNSET + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def const_key(obj: Any) -> bytes | tuple[type, int]: ... +def _check_arg_int(arg: Any, name: str) -> TypeGuard[int]: ... +def opcode_has_argument(opcode: int) -> bool: ... + +# ── label / variable types ──────────────────────────────────────────────────── + +class Label: ... + +PLACEHOLDER_LABEL: Label + +class _Variable: + name: str + def __init__(self, name: str) -> None: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + def __repr__(self) -> str: ... + +class CellVar(_Variable): ... +class FreeVar(_Variable): ... + +# ── InstrLocation ───────────────────────────────────────────────────────────── + +class InstrLocation: + lineno: Optional[int] + end_lineno: Optional[int] + col_offset: Optional[int] + end_col_offset: Optional[int] + def __init__( + self, + lineno: Optional[int], + end_lineno: Optional[int], + col_offset: Optional[int], + end_col_offset: Optional[int], + ) -> None: ... + @classmethod + def from_positions(cls, position: Any) -> InstrLocation: ... + @classmethod + def _from_tuple( + cls, + lineno: Optional[int], + end_lineno: Optional[int], + col_offset: Optional[int], + end_col_offset: Optional[int], + ) -> InstrLocation: ... + +# ── pseudo-instructions ─────────────────────────────────────────────────────── + +class SetLineno: + def __init__(self, lineno: int) -> None: ... + @property + def lineno(self) -> int: ... + def __eq__(self, other: Any) -> bool: ... + +class TryBegin: + target: Label | _bytecode.BasicBlock + push_lasti: bool + stack_depth: int | _UNSET + def __init__( + self, + target: Label | _bytecode.BasicBlock, + push_lasti: bool, + stack_depth: int | _UNSET = ..., + ) -> None: ... + def copy(self) -> TryBegin: ... + +class TryEnd: + entry: TryBegin + def __init__(self, entry: TryBegin) -> None: ... + def copy(self) -> TryEnd: ... + +# ── InstrArg ────────────────────────────────────────────────────────────────── + +InstrArg = Union[ + int, + str, + Label, + CellVar, + FreeVar, + _bytecode.BasicBlock, + Compare, + FormatValue, + BinaryOp, + Intrinsic1Op, + Intrinsic2Op, + CommonConstant, + SpecialMethod, + tuple[bool, str], + tuple[bool, bool, str], + tuple[bool, FormatValue], + tuple[str | CellVar | FreeVar, str | CellVar | FreeVar], +] + +# ── BaseInstr / Instr ───────────────────────────────────────────────────────── + +class BaseInstr(Generic[A]): + def __init__( + self, + name: str, + arg: A = ..., + *, + lineno: int | None | _UNSET = ..., + location: Optional[InstrLocation] = None, + ) -> None: ... + def __class_getitem__(cls, item: Any) -> types.GenericAlias: ... + def set(self, name: str, arg: A = ...) -> None: ... + def require_arg(self) -> bool: ... + @property + def name(self) -> str: ... + @name.setter + def name(self, name: str) -> None: ... + @property + def opcode(self) -> int: ... + @opcode.setter + def opcode(self, op: int) -> None: ... + @property + def arg(self) -> A: ... + @arg.setter + def arg(self, arg: A) -> None: ... + @property + def lineno(self) -> int | _UNSET | None: ... + @lineno.setter + def lineno(self, lineno: int | _UNSET | None) -> None: ... + @property + def location(self) -> Optional[InstrLocation]: ... + @location.setter + def location(self, location: Optional[InstrLocation]) -> None: ... + def stack_effect(self, jump: Optional[bool] = None) -> int: ... + def pre_and_post_stack_effect( + self, jump: Optional[bool] = None + ) -> tuple[int, int]: ... + def copy(self: T) -> T: ... + @classmethod + def _from_trusted( + cls: type[T], + name: str, + opcode: int, + arg: A, + location: Optional[InstrLocation], + ) -> T: ... + def has_jump(self) -> bool: ... + def is_cond_jump(self) -> bool: ... + def is_uncond_jump(self) -> bool: ... + def is_abs_jump(self) -> bool: ... + def is_forward_rel_jump(self) -> bool: ... + def is_backward_rel_jump(self) -> bool: ... + def is_final(self) -> bool: ... + +class Instr(BaseInstr[InstrArg]): ... diff --git a/tests/test_instr.py b/tests/test_instr.py index 189ab0c7..7bacd8ca 100644 --- a/tests/test_instr.py +++ b/tests/test_instr.py @@ -113,6 +113,19 @@ def test_init(self): else: InstrLocation(*args) + def test_immutable(self): + import importlib.util + + spec = importlib.util.find_spec("bytecode.concrete") + is_pure = spec and spec.origin and spec.origin.endswith(".py") + if not is_pure: + self.skipTest("immutability is only enforced in the pure-Python build") + loc = InstrLocation(1, 2, 3, 4) + with self.assertRaises((AttributeError, TypeError)): + loc.lineno = 99 # type: ignore[misc] + with self.assertRaises((AttributeError, TypeError)): + del loc.lineno # type: ignore[misc] + class InstrTests(TestCase): def test_constructor(self): From a9eda7700a4edefe41621fc34964d461ffcd41bd Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Wed, 20 May 2026 11:06:19 +0100 Subject: [PATCH 10/12] fix typing --- src/bytecode/instr.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bytecode/instr.pyi b/src/bytecode/instr.pyi index 9683d2f9..f4c39099 100644 --- a/src/bytecode/instr.pyi +++ b/src/bytecode/instr.pyi @@ -221,5 +221,7 @@ class BaseInstr(Generic[A]): def is_forward_rel_jump(self) -> bool: ... def is_backward_rel_jump(self) -> bool: ... def is_final(self) -> bool: ... + def __eq__(self, other: object) -> bool: ... + def _set(self, name: str, arg: A) -> None: ... class Instr(BaseInstr[InstrArg]): ... From 3076a771dbd15726e7d4f2229ad63d94a1fd0b6e Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Wed, 20 May 2026 11:07:27 +0100 Subject: [PATCH 11/12] salvage immutability --- src/bytecode/instr.pxd | 12 ++++-------- src/bytecode/instr.py | 32 +++++++++++++++++++++----------- tests/test_instr.py | 6 ------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/bytecode/instr.pxd b/src/bytecode/instr.pxd index c2d20b8f..d47af54b 100644 --- a/src/bytecode/instr.pxd +++ b/src/bytecode/instr.pxd @@ -1,13 +1,9 @@ cdef class InstrLocation: # Must be `object` (not `int`) because these fields are Optional[int] and can be None. - # `public` (not `readonly`) is required because _from_tuple/__init__ assign via an - # untyped `new` variable; Cython routes those through the Python descriptor, which - # would raise for `readonly`. Immutability is enforced only in pure-Python mode by - # @dataclass(frozen=True). - cdef public object lineno - cdef public object end_lineno - cdef public object col_offset - cdef public object end_col_offset + cdef readonly object lineno + cdef readonly object end_lineno + cdef readonly object col_offset + cdef readonly object end_col_offset cdef class BaseInstr: cdef public str _name diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index cdd96e5e..3d7338f8 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -577,6 +577,22 @@ class InstrLocation: __slots__ = ["col_offset", "end_col_offset", "end_lineno", "lineno"] + def _unsafe_set( + self, + lineno: Optional[int], + end_lineno: Optional[int], + col_offset: Optional[int], + end_col_offset: Optional[int], + ) -> None: + # When Cython-compiled, `self` is statically typed as InstrLocation so + # these assignments compile to direct C struct writes, bypassing the + # readonly Python descriptor. In pure Python mode callers use + # object.__setattr__ instead and never reach this method. + self.lineno = lineno # type: ignore[misc] + self.end_lineno = end_lineno # type: ignore[misc] + self.col_offset = col_offset # type: ignore[misc] + self.end_col_offset = end_col_offset # type: ignore[misc] + def __init__( self, lineno: Optional[int], @@ -585,10 +601,7 @@ def __init__( end_col_offset: Optional[int], ) -> None: if cython.compiled: - self.lineno = lineno - self.end_lineno = end_lineno - self.col_offset = col_offset - self.end_col_offset = end_col_offset + self._unsafe_set(lineno, end_lineno, col_offset, end_col_offset) else: # Needed because we want the class to be frozen in pure Python object.__setattr__(self, "lineno", lineno) @@ -649,12 +662,9 @@ def _from_tuple( end_col_offset: Optional[int], ) -> InstrLocation: """Fast path for trusted position data (e.g. from co_positions()).""" - new = cls.__new__(cls) + new: InstrLocation = cls.__new__(cls) if cython.compiled: - new.lineno = lineno - new.end_lineno = end_lineno - new.col_offset = col_offset - new.end_col_offset = end_col_offset + new._unsafe_set(lineno, end_lineno, col_offset, end_col_offset) else: object.__setattr__(new, "lineno", lineno) object.__setattr__(new, "end_lineno", end_lineno) @@ -916,10 +926,10 @@ def __repr__(self) -> str: else: return "<%s location=%s>" % (self._name, self._location) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if type(self) is not type(other): return False - return self._cmp_key() == other._cmp_key() + return self._cmp_key() == other._cmp_key() # type: ignore[union-attr] # --- Private API diff --git a/tests/test_instr.py b/tests/test_instr.py index 7bacd8ca..7f6ce4cc 100644 --- a/tests/test_instr.py +++ b/tests/test_instr.py @@ -114,12 +114,6 @@ def test_init(self): InstrLocation(*args) def test_immutable(self): - import importlib.util - - spec = importlib.util.find_spec("bytecode.concrete") - is_pure = spec and spec.origin and spec.origin.endswith(".py") - if not is_pure: - self.skipTest("immutability is only enforced in the pure-Python build") loc = InstrLocation(1, 2, 3, 4) with self.assertRaises((AttributeError, TypeError)): loc.lineno = 99 # type: ignore[misc] From 4fbc69884b9adda9db9ea9acea422b62bf81ac75 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Wed, 20 May 2026 11:24:10 +0100 Subject: [PATCH 12/12] use isinstance check --- src/bytecode/instr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 3d7338f8..004dc592 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -927,9 +927,9 @@ def __repr__(self) -> str: return "<%s location=%s>" % (self._name, self._location) def __eq__(self, other: object) -> bool: - if type(self) is not type(other): + if not isinstance(other, BaseInstr): return False - return self._cmp_key() == other._cmp_key() # type: ignore[union-attr] + return self._cmp_key() == other._cmp_key() # --- Private API