From 037d1a8ceedec5eb72223b76a23a39150e8dffd8 Mon Sep 17 00:00:00 2001 From: Anuj Bharambe Date: Fri, 8 May 2026 23:23:54 +0530 Subject: [PATCH 1/3] gh-149564: Propagate call-site hotness to callees via dynamic exits --- Lib/test/test_capi/test_opt.py | 35 +++++++++++++++++++ ...05-08-00-00-00.gh-issue-149564.c37SnPN.rst | 2 ++ Python/bytecodes.c | 16 ++++++--- Python/executor_cases.c.h | 35 ++++++++++++++----- 4 files changed, 76 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-08-00-00-00.gh-issue-149564.c37SnPN.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2f606c2c6eba2d..8e525ed0020ecf 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -6152,6 +6152,41 @@ def __exit__(self, e, v, t): ... f1() """), PYTHON_JIT="1") + def test_dynamic_exit_boosts_resume(self): + # gh-149564: When a hot loop calls many distinct exec()-generated + # functions, _DYNAMIC_EXIT should boost the callee's RESUME counter + # so it gets traced sooner. + script_helper.assert_python_ok("-s", "-c", textwrap.dedent(f""" + import _opcode + + ns = {{}} + for i in range(20): + exec(f"def fn_{{i}}(x): return x + {{i}}", ns) + + fns = [ns[f'fn_{{i}}'] for i in range(20)] + + # Hot loop calling many exec'd functions triggers dynamic exits + for _ in range({TIER2_THRESHOLD + 100}): + for fn in fns: + fn(42) + + # At least some callees should have gotten their own executors + # thanks to the counter boost on dynamic exit + count = 0 + for fn in fns: + code = fn.__code__ + co_code = code.co_code + for i in range(0, len(co_code), 2): + try: + _opcode.get_executor(code, i) + count += 1 + break + except ValueError: + pass + + assert count > 0, f"Expected at least one callee to get an executor, got {{count}}" + """), PYTHON_JIT="1") + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-08-00-00-00.gh-issue-149564.c37SnPN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-08-00-00-00.gh-issue-149564.c37SnPN.rst new file mode 100644 index 00000000000000..1da7bc6375d15e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-08-00-00-00.gh-issue-149564.c37SnPN.rst @@ -0,0 +1,2 @@ +The JIT now traces ``exec()``-generated functions called from hot loops by +propagating call-site hotness to callees via dynamic exits. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3bd489122da9d4..94a8970ba5c86f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -6190,9 +6190,9 @@ dummy_func( } tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) { + _Py_CODEUNIT *target = frame->instr_ptr; #if defined(Py_DEBUG) && !defined(_Py_JIT) _PyExitData *exit = (_PyExitData *)exit_p; - _Py_CODEUNIT *target = frame->instr_ptr; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 3) { printf("DYNAMIC EXIT: [UOp "); @@ -6203,9 +6203,13 @@ dummy_func( _PyOpcode_OpName[target->op.code]); } #endif - // Disabled for now (gh-139109) as it slows down dynamic code tremendously. - // Compile and jump to the cold dynamic executors in the future. - GOTO_TIER_ONE(frame->instr_ptr); + // gh-149564: Propagate call-site hotness to callees. + // If we're landing on a callee's RESUME, boost its counter so it + // gets traced sooner (it was called from a hot trace). + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } + GOTO_TIER_ONE(target); } tier2 op(_CHECK_VALIDITY, (--)) { @@ -6330,6 +6334,10 @@ dummy_func( SYNC_SP(); // TODO (gh-139109): This should be similar to _COLD_EXIT in the future. _Py_CODEUNIT *target = frame->instr_ptr; + // gh-149564: Propagate call-site hotness to callees. + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } GOTO_TIER_ONE(target); Py_UNREACHABLE(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b6a2821db3007e..c12e3cad3b6adb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -22746,9 +22746,9 @@ CHECK_CURRENT_CACHED_VALUES(0); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); PyObject *exit_p = (PyObject *)CURRENT_OPERAND0_64(); + _Py_CODEUNIT *target = frame->instr_ptr; #if defined(Py_DEBUG) && !defined(_Py_JIT) _PyExitData *exit = (_PyExitData *)exit_p; - _Py_CODEUNIT *target = frame->instr_ptr; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 3) { _PyFrame_SetStackPointer(frame, stack_pointer); @@ -22761,8 +22761,12 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } #endif + + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } SET_CURRENT_CACHED_VALUES(0); - GOTO_TIER_ONE(frame->instr_ptr); + GOTO_TIER_ONE(target); } case _DYNAMIC_EXIT_r10: { @@ -22770,9 +22774,9 @@ assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef _stack_item_0 = _tos_cache0; PyObject *exit_p = (PyObject *)CURRENT_OPERAND0_64(); + _Py_CODEUNIT *target = frame->instr_ptr; #if defined(Py_DEBUG) && !defined(_Py_JIT) _PyExitData *exit = (_PyExitData *)exit_p; - _Py_CODEUNIT *target = frame->instr_ptr; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 3) { stack_pointer[0] = _stack_item_0; @@ -22789,11 +22793,15 @@ stack_pointer += -1; } #endif + + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } SET_CURRENT_CACHED_VALUES(0); stack_pointer[0] = _stack_item_0; stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - GOTO_TIER_ONE(frame->instr_ptr); + GOTO_TIER_ONE(target); } case _DYNAMIC_EXIT_r20: { @@ -22802,9 +22810,9 @@ _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; PyObject *exit_p = (PyObject *)CURRENT_OPERAND0_64(); + _Py_CODEUNIT *target = frame->instr_ptr; #if defined(Py_DEBUG) && !defined(_Py_JIT) _PyExitData *exit = (_PyExitData *)exit_p; - _Py_CODEUNIT *target = frame->instr_ptr; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 3) { stack_pointer[0] = _stack_item_0; @@ -22822,12 +22830,16 @@ stack_pointer += -2; } #endif + + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } SET_CURRENT_CACHED_VALUES(0); stack_pointer[0] = _stack_item_0; stack_pointer[1] = _stack_item_1; stack_pointer += 2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - GOTO_TIER_ONE(frame->instr_ptr); + GOTO_TIER_ONE(target); } case _DYNAMIC_EXIT_r30: { @@ -22837,9 +22849,9 @@ _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; PyObject *exit_p = (PyObject *)CURRENT_OPERAND0_64(); + _Py_CODEUNIT *target = frame->instr_ptr; #if defined(Py_DEBUG) && !defined(_Py_JIT) _PyExitData *exit = (_PyExitData *)exit_p; - _Py_CODEUNIT *target = frame->instr_ptr; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 3) { stack_pointer[0] = _stack_item_0; @@ -22858,13 +22870,17 @@ stack_pointer += -3; } #endif + + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } SET_CURRENT_CACHED_VALUES(0); stack_pointer[0] = _stack_item_0; stack_pointer[1] = _stack_item_1; stack_pointer[2] = _stack_item_2; stack_pointer += 3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - GOTO_TIER_ONE(frame->instr_ptr); + GOTO_TIER_ONE(target); } case _CHECK_VALIDITY_r00: { @@ -23685,6 +23701,9 @@ CHECK_CURRENT_CACHED_VALUES(0); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _Py_CODEUNIT *target = frame->instr_ptr; + if (target->op.code == RESUME_CHECK_JIT) { + target[1].counter = trigger_backoff_counter(); + } SET_CURRENT_CACHED_VALUES(0); GOTO_TIER_ONE(target); } From da5d05481aba35e1f7a8cd6935816308153da56c Mon Sep 17 00:00:00 2001 From: Anuj Bharambe Date: Sat, 9 May 2026 08:17:59 +0530 Subject: [PATCH 2/3] gh-149564: Add _COLD_EXIT callee RESUME boost --- Lib/test/test_capi/test_opt.py | 4 ++-- Python/bytecodes.c | 18 ++++++++++++++++++ Python/executor_cases.c.h | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8e525ed0020ecf..b206fad0966019 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -6154,8 +6154,8 @@ def __exit__(self, e, v, t): ... def test_dynamic_exit_boosts_resume(self): # gh-149564: When a hot loop calls many distinct exec()-generated - # functions, _DYNAMIC_EXIT should boost the callee's RESUME counter - # so it gets traced sooner. + # functions, _COLD_EXIT should boost the callee's RESUME counter + # so it gets traced sooner (propagate call-site hotness to callees). script_helper.assert_python_ok("-s", "-c", textwrap.dedent(f""" import _opcode diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 94a8970ba5c86f..8c29399a19c266 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -6308,6 +6308,24 @@ dummy_func( } else { SYNC_SP(); + // gh-149564: Propagate call-site hotness to callees. + // If exiting to a CALL instruction, boost the callee's RESUME + // counter so it gets traced sooner (called from a hot trace). + { + uint8_t target_deopt = _PyOpcode_Deopt[target->op.code]; + if (target_deopt == CALL || target_deopt == CALL_KW) { + int oparg = target->op.arg; + _PyStackRef callable_ref = stack_pointer[-(oparg + 2)]; + PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); + if (PyFunction_Check(callable)) { + PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; + _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); + if (resume_instr->op.code == RESUME_CHECK_JIT) { + resume_instr[1].counter = trigger_backoff_counter(); + } + } + } + } if (!backoff_counter_triggers(temperature)) { exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c12e3cad3b6adb..6f96fc524dc23d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -23679,6 +23679,24 @@ TIER2_TO_TIER2(exit->executor); } else { + // gh-149564: Propagate call-site hotness to callees. + // If exiting to a CALL instruction, boost the callee's RESUME + // counter so it gets traced sooner (called from a hot trace). + { + uint8_t target_deopt = _PyOpcode_Deopt[target->op.code]; + if (target_deopt == CALL || target_deopt == CALL_KW) { + int oparg = target->op.arg; + _PyStackRef callable_ref = stack_pointer[-(oparg + 2)]; + PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); + if (PyFunction_Check(callable)) { + PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; + _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); + if (resume_instr->op.code == RESUME_CHECK_JIT) { + resume_instr[1].counter = trigger_backoff_counter(); + } + } + } + } if (!backoff_counter_triggers(temperature)) { exit->temperature = advance_backoff_counter(temperature); SET_CURRENT_CACHED_VALUES(0); From 8933f44d5cb10d1333ae6b076dfc48f5fa8819d8 Mon Sep 17 00:00:00 2001 From: Anuj Bharambe Date: Sat, 9 May 2026 10:14:07 +0530 Subject: [PATCH 3/3] gh-149564: Fix _COLD_EXIT boost - rename oparg, drop CALL_KW --- Python/bytecodes.c | 21 +++++++++------------ Python/executor_cases.c.h | 21 +++++++++------------ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8c29399a19c266..efc98ab15c756c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -6311,18 +6311,15 @@ dummy_func( // gh-149564: Propagate call-site hotness to callees. // If exiting to a CALL instruction, boost the callee's RESUME // counter so it gets traced sooner (called from a hot trace). - { - uint8_t target_deopt = _PyOpcode_Deopt[target->op.code]; - if (target_deopt == CALL || target_deopt == CALL_KW) { - int oparg = target->op.arg; - _PyStackRef callable_ref = stack_pointer[-(oparg + 2)]; - PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); - if (PyFunction_Check(callable)) { - PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; - _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); - if (resume_instr->op.code == RESUME_CHECK_JIT) { - resume_instr[1].counter = trigger_backoff_counter(); - } + if (_PyOpcode_Deopt[target->op.code] == CALL) { + int call_oparg = target->op.arg; + _PyStackRef callable_ref = stack_pointer[-(call_oparg + 2)]; + PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); + if (PyFunction_Check(callable)) { + PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; + _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); + if (resume_instr->op.code == RESUME_CHECK_JIT) { + resume_instr[1].counter = trigger_backoff_counter(); } } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6f96fc524dc23d..c396c5dc68de22 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -23682,18 +23682,15 @@ // gh-149564: Propagate call-site hotness to callees. // If exiting to a CALL instruction, boost the callee's RESUME // counter so it gets traced sooner (called from a hot trace). - { - uint8_t target_deopt = _PyOpcode_Deopt[target->op.code]; - if (target_deopt == CALL || target_deopt == CALL_KW) { - int oparg = target->op.arg; - _PyStackRef callable_ref = stack_pointer[-(oparg + 2)]; - PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); - if (PyFunction_Check(callable)) { - PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; - _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); - if (resume_instr->op.code == RESUME_CHECK_JIT) { - resume_instr[1].counter = trigger_backoff_counter(); - } + if (_PyOpcode_Deopt[target->op.code] == CALL) { + int call_oparg = target->op.arg; + _PyStackRef callable_ref = stack_pointer[-(call_oparg + 2)]; + PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_ref); + if (PyFunction_Check(callable)) { + PyCodeObject *code = (PyCodeObject *)((PyFunctionObject *)callable)->func_code; + _Py_CODEUNIT *resume_instr = _PyCode_CODE(code); + if (resume_instr->op.code == RESUME_CHECK_JIT) { + resume_instr[1].counter = trigger_backoff_counter(); } } }