diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index ce6324d0a8e0b9..80c11b753be7e6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, @@ -1347,7 +1347,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, + [BINARY_OP_EXTEND] = { .nuops = 4, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 }, { _POP_TOP, OPARG_SIMPLE, 5 }, { _POP_TOP, OPARG_SIMPLE, 5 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8fd7cef3368e13..d23b447fb518f8 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -379,7 +379,7 @@ extern "C" { #define _BINARY_OP_ADD_UNICODE_r03 576 #define _BINARY_OP_ADD_UNICODE_r13 577 #define _BINARY_OP_ADD_UNICODE_r23 578 -#define _BINARY_OP_EXTEND_r21 579 +#define _BINARY_OP_EXTEND_r23 579 #define _BINARY_OP_INPLACE_ADD_UNICODE_r21 580 #define _BINARY_OP_MULTIPLY_FLOAT_r03 581 #define _BINARY_OP_MULTIPLY_FLOAT_r13 582 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6398448d5faece..4b8b9fc235d77f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -115,7 +115,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, @@ -1101,7 +1101,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_EXTEND_r21 }, + { 3, 2, _BINARY_OP_EXTEND_r23 }, { -1, -1, -1 }, }, }, @@ -3568,7 +3568,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r23] = _BINARY_OP_ADD_UNICODE, [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = _BINARY_OP_INPLACE_ADD_UNICODE, [_GUARD_BINARY_OP_EXTEND_r22] = _GUARD_BINARY_OP_EXTEND, - [_BINARY_OP_EXTEND_r21] = _BINARY_OP_EXTEND, + [_BINARY_OP_EXTEND_r23] = _BINARY_OP_EXTEND, [_BINARY_SLICE_r31] = _BINARY_SLICE, [_STORE_SLICE_r30] = _STORE_SLICE, [_BINARY_OP_SUBSCR_LIST_INT_r23] = _BINARY_OP_SUBSCR_LIST_INT, @@ -4097,7 +4097,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r13] = "_BINARY_OP_ADD_UNICODE_r13", [_BINARY_OP_ADD_UNICODE_r23] = "_BINARY_OP_ADD_UNICODE_r23", [_BINARY_OP_EXTEND] = "_BINARY_OP_EXTEND", - [_BINARY_OP_EXTEND_r21] = "_BINARY_OP_EXTEND_r21", + [_BINARY_OP_EXTEND_r23] = "_BINARY_OP_EXTEND_r23", [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE", [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = "_BINARY_OP_INPLACE_ADD_UNICODE_r21", [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 79c7f530b8ae89..00dbbb65776058 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2880,6 +2880,23 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_extend_float_long_add_refcount_elimination(self): + def testfunc(n): + a = 1.5 + b = 2 + res = 0.0 + for _ in range(n): + res = a + b + return res + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3.5) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_EXTEND", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst new file mode 100644 index 00000000000000..b3582197f45dda --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``BINARY_OP_EXTEND``. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index c02d236fc3e8ac..a7d589dbe7b274 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,25 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5f2461df8e672c..f7551bfbf3c2db 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -829,7 +829,7 @@ dummy_func( DEOPT_IF(!res); } - op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); @@ -838,13 +838,18 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = d->action(left_o, right_o); - DECREF_INPUTS(); - ERROR_IF(res_o == NULL); + if (res_o == NULL) { + ERROR_NO_POP(); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + DEAD(left); + DEAD(right); } macro(BINARY_OP_EXTEND) = - unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND; + unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND + POP_TOP + POP_TOP; macro(BINARY_OP_INPLACE_ADD_UNICODE) = _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_INPLACE_ADD_UNICODE; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f9b62b0bab6ab..56d0b192edba7a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5143,12 +5143,14 @@ break; } - case _BINARY_OP_EXTEND_r21: { + case _BINARY_OP_EXTEND_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef right; _PyStackRef left; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; right = _stack_item_1; @@ -5165,26 +5167,20 @@ ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 194fbe4f268cb4..9df6b2f70f96df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,25 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 876ba7c6de7482..9ad915ea01a54b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -310,6 +310,12 @@ dummy_func(void) { r = right; } + op(_BINARY_OP_EXTEND, (left, right -- res, l, r)) { + res = sym_new_not_null(ctx); + l = left; + r = right; + } + op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- res)) { if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 012fe16bfd9096..d5b9a4159d7341 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -882,11 +882,22 @@ } case _BINARY_OP_EXTEND: { + JitOptRef right; + JitOptRef left; JitOptRef res; + JitOptRef l; + JitOptRef r; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *descr = (PyObject *)this_instr->operand0; res = sym_new_not_null(ctx); - CHECK_STACK_BOUNDS(-1); + l = left; + r = right; + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }