From dfd99d78ebd4991e10f4e326ac63aef12aced7d5 Mon Sep 17 00:00:00 2001 From: cocolato Date: Tue, 20 Jan 2026 22:09:42 +0800 Subject: [PATCH 1/4] Add slots object property tracking for the Tier 2 JIT optimizer --- Include/internal/pycore_optimizer.h | 3 + Include/internal/pycore_optimizer_types.h | 16 +++ Python/optimizer_analysis.c | 3 + Python/optimizer_bytecodes.c | 9 +- Python/optimizer_cases.c.h | 6 +- Python/optimizer_symbols.c | 119 +++++++++++++++++++++- 6 files changed, 147 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 0592221f15226e..35dcbf765f0d03 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -205,6 +205,9 @@ extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, extern bool _Py_uop_sym_is_compact_int(JitOptRef sym); extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx); extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_slots_object(JitOptContext *ctx, unsigned int type_version); +extern JitOptRef _Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index); +extern void _Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value); extern void _Py_uop_abstractcontext_init(JitOptContext *ctx); extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx); diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index 6501ce869c1425..41902efbacdd84 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -40,6 +40,7 @@ typedef enum _JitSymType { JIT_SYM_TUPLE_TAG = 8, JIT_SYM_TRUTHINESS_TAG = 9, JIT_SYM_COMPACT_INT = 10, + JIT_SYM_SLOTS_TAG = 11, } JitSymType; typedef struct _jit_opt_known_class { @@ -76,6 +77,20 @@ typedef struct { uint8_t tag; } JitOptCompactInt; +#define MAX_SYMBOLIC_SLOTS_SIZE 4 + +typedef struct { + uint16_t slot_index; + uint16_t symbol; +} JitOptSlotMapping; + +typedef struct _jit_opt_slots { + uint8_t tag; + uint8_t num_slots; + uint32_t type_version; + JitOptSlotMapping slots[MAX_SYMBOLIC_SLOTS_SIZE]; +} JitOptSlotsObject; + typedef union _jit_opt_symbol { uint8_t tag; JitOptKnownClass cls; @@ -84,6 +99,7 @@ typedef union _jit_opt_symbol { JitOptTuple tuple; JitOptTruthiness truthiness; JitOptCompactInt compact; + JitOptSlotsObject slots; } JitOptSymbol; // This mimics the _PyStackRef API diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d635ebabf9007a..40360a05b914c1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -247,6 +247,9 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_slots_object _Py_uop_sym_new_slots_object +#define sym_slots_getattr _Py_uop_sym_slots_getattr +#define sym_slots_setattr _Py_uop_sym_slots_setattr #define JUMP_TO_LABEL(label) goto label; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 876ba7c6de7482..2c1de7db42d446 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -38,6 +38,9 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_slots_object _Py_uop_sym_new_slots_object +#define sym_slots_getattr _Py_uop_sym_slots_getattr +#define sym_slots_setattr _Py_uop_sym_slots_setattr extern int optimize_to_bool( @@ -123,8 +126,7 @@ dummy_func(void) { } op(_STORE_ATTR_SLOT, (index/1, value, owner -- o)) { - (void)index; - (void)value; + sym_slots_setattr(ctx, owner, (uint16_t)index, value); o = owner; } @@ -709,8 +711,7 @@ dummy_func(void) { } op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, o)) { - attr = sym_new_not_null(ctx); - (void)index; + attr = sym_slots_getattr(ctx, owner, (uint16_t)index); o = owner; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 012fe16bfd9096..ababa04e604bbc 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1919,8 +1919,7 @@ JitOptRef o; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand0; - attr = sym_new_not_null(ctx); - (void)index; + attr = sym_slots_getattr(ctx, owner, (uint16_t)index); o = owner; CHECK_STACK_BOUNDS(1); stack_pointer[-1] = attr; @@ -2027,8 +2026,7 @@ owner = stack_pointer[-1]; value = stack_pointer[-2]; uint16_t index = (uint16_t)this_instr->operand0; - (void)index; - (void)value; + sym_slots_setattr(ctx, owner, (uint16_t)index, value); o = owner; CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = o; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 5f5086d33b5c4c..2587a16907457d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -113,6 +113,9 @@ _PyUOpSymPrint(JitOptRef ref) case JIT_SYM_COMPACT_INT: printf("", (void *)sym); break; + case JIT_SYM_SLOTS_TAG: + printf("", sym->slots.num_slots, sym->slots.type_version, (void *)sym); + break; default: printf("", sym->tag, (void *)sym); break; @@ -319,6 +322,11 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_SLOTS_TAG: + if (typ->tp_version_tag != sym->slots.type_version) { + sym_set_bottom(ctx, sym); + } + return; } } @@ -382,6 +390,12 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int ver return false; } return true; + case JIT_SYM_SLOTS_TAG: + if (version != sym->slots.type_version) { + sym_set_bottom(ctx, sym); + return false; + } + return true; } Py_UNREACHABLE(); } @@ -474,6 +488,9 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_SLOTS_TAG: + sym_set_bottom(ctx, sym); + return; } } @@ -593,7 +610,8 @@ _Py_uop_sym_get_type(JitOptRef ref) return &PyBool_Type; case JIT_SYM_COMPACT_INT: return &PyLong_Type; - + case JIT_SYM_SLOTS_TAG: + return _PyType_LookupByVersion(sym->slots.type_version); } Py_UNREACHABLE(); } @@ -621,6 +639,8 @@ _Py_uop_sym_get_type_version(JitOptRef ref) return PyBool_Type.tp_version_tag; case JIT_SYM_COMPACT_INT: return PyLong_Type.tp_version_tag; + case JIT_SYM_SLOTS_TAG: + return sym->slots.type_version; } Py_UNREACHABLE(); } @@ -655,6 +675,7 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref) case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: case JIT_SYM_COMPACT_INT: + case JIT_SYM_SLOTS_TAG: return -1; case JIT_SYM_KNOWN_CLASS_TAG: /* TODO : @@ -811,6 +832,7 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) return; case JIT_SYM_TUPLE_TAG: case JIT_SYM_TRUTHINESS_TAG: + case JIT_SYM_SLOTS_TAG: sym_set_bottom(ctx, sym); return; case JIT_SYM_BOTTOM_TAG: @@ -859,6 +881,74 @@ _Py_uop_sym_new_compact_int(JitOptContext *ctx) return PyJitRef_Wrap(sym); } +JitOptRef +_Py_uop_sym_new_slots_object(JitOptContext *ctx, unsigned int type_version) +{ + JitOptSymbol *res = sym_new(ctx); + if (res == NULL) { + return out_of_space_ref(ctx); + } + res->tag = JIT_SYM_SLOTS_TAG; + res->slots.num_slots = 0; + res->slots.type_version = type_version; + return PyJitRef_Wrap(res); +} + +JitOptRef +_Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + + if (sym->tag == JIT_SYM_SLOTS_TAG) { + // Linear search through the mapping array + for (int i = 0; i < sym->slots.num_slots; i++) { + if (sym->slots.slots[i].slot_index == slot_index) { + return PyJitRef_Wrap(allocation_base(ctx) + sym->slots.slots[i].symbol); + } + } + } + + // Not found, return not_null + return _Py_uop_sym_new_not_null(ctx); +} + +void +_Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + + if (sym->tag == JIT_SYM_TYPE_VERSION_TAG) { + uint32_t version = sym->version.version; + sym->tag = JIT_SYM_SLOTS_TAG; + sym->slots.type_version = version; + sym->slots.num_slots = 0; + } + else if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { + uint32_t version = sym->cls.version; + sym->tag = JIT_SYM_SLOTS_TAG; + sym->slots.type_version = version; + sym->slots.num_slots = 0; + } + else if (sym->tag != JIT_SYM_SLOTS_TAG) { + return; + } + + if (sym->slots.num_slots >= MAX_SYMBOLIC_SLOTS_SIZE) { + return; + } + + for (int i = 0; i < sym->slots.num_slots; i++) { + if (sym->slots.slots[i].slot_index == slot_index) { + sym->slots.slots[i].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + return; + } + } + + int idx = sym->slots.num_slots++; + sym->slots.slots[idx].slot_index = slot_index; + sym->slots.slots[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); +} + // 0 on success, -1 on error. _Py_UOpsAbstractFrame * _Py_uop_frame_new( @@ -1186,6 +1276,33 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "43 is not an int"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref_int) == val_43, "43 isn't 43"); + JitOptRef slots_obj = _Py_uop_sym_new_slots_object(ctx, 42); + TEST_PREDICATE(!_Py_uop_sym_is_null(slots_obj), "slots object is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(slots_obj), "slots object is not not-null"); + TEST_PREDICATE(_Py_uop_sym_get_type_version(slots_obj) == 42, + "slots object has wrong type version"); + + JitOptRef slot_val = _Py_uop_sym_new_const(ctx, val_42); + _Py_uop_sym_slots_setattr(ctx, slots_obj, 0, slot_val); + JitOptRef retrieved = _Py_uop_sym_slots_getattr(ctx, slots_obj, 0); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, retrieved) == val_42, + "slots getattr(0) didn't return val_42"); + + JitOptRef missing = _Py_uop_sym_slots_getattr(ctx, slots_obj, 99); + TEST_PREDICATE(_Py_uop_sym_is_not_null(missing), "missing slot is not not-null"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, missing), "missing slot is const"); + + JitOptRef slot_val2 = _Py_uop_sym_new_const(ctx, val_43); + _Py_uop_sym_slots_setattr(ctx, slots_obj, 0, slot_val2); + retrieved = _Py_uop_sym_slots_getattr(ctx, slots_obj, 0); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, retrieved) == val_43, + "slots getattr(0) didn't return val_43 after update"); + + JitOptRef slots_obj2 = _Py_uop_sym_new_slots_object(ctx, 42); + _Py_uop_sym_set_type_version(ctx, slots_obj2, 43); + TEST_PREDICATE(_Py_uop_sym_is_bottom(slots_obj2), + "slots object with wrong type version isn't bottom"); + _Py_uop_abstractcontext_fini(ctx); Py_DECREF(val_42); Py_DECREF(val_43); From 7bb2c20e65b98ecb10f18caff327fa36f60e0181 Mon Sep 17 00:00:00 2001 From: cocolato Date: Thu, 22 Jan 2026 01:28:00 +0800 Subject: [PATCH 2/4] use s_arena to save slots mapping --- Include/internal/pycore_optimizer_types.h | 17 +++++-- Python/optimizer_symbols.c | 56 ++++++++++++++++++----- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index 41902efbacdd84..c583927d8993d1 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -16,6 +16,10 @@ extern "C" { #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) +// Maximum slots per object tracked symbolically +#define MAX_SYMBOLIC_SLOTS_SIZE 16 +#define SLOTS_ARENA_SIZE (MAX_SYMBOLIC_SLOTS_SIZE * 100) + // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (16) @@ -77,8 +81,6 @@ typedef struct { uint8_t tag; } JitOptCompactInt; -#define MAX_SYMBOLIC_SLOTS_SIZE 4 - typedef struct { uint16_t slot_index; uint16_t symbol; @@ -88,7 +90,7 @@ typedef struct _jit_opt_slots { uint8_t tag; uint8_t num_slots; uint32_t type_version; - JitOptSlotMapping slots[MAX_SYMBOLIC_SLOTS_SIZE]; + JitOptSlotMapping *slots; } JitOptSlotsObject; typedef union _jit_opt_symbol { @@ -128,6 +130,12 @@ typedef struct ty_arena { JitOptSymbol arena[TY_ARENA_SIZE]; } ty_arena; +typedef struct slots_arena { + int slots_curr_number; + int slots_max_number; + JitOptSlotMapping arena[SLOTS_ARENA_SIZE]; +} slots_arena; + typedef struct _JitOptContext { char done; char out_of_space; @@ -142,6 +150,9 @@ typedef struct _JitOptContext { // Arena for the symbolic types. ty_arena t_arena; + // Arena for slots mappings. + slots_arena s_arena; + JitOptRef *n_consumed; JitOptRef *limit; JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2587a16907457d..336f420759a422 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -114,7 +114,12 @@ _PyUOpSymPrint(JitOptRef ref) printf("", (void *)sym); break; case JIT_SYM_SLOTS_TAG: - printf("", sym->slots.num_slots, sym->slots.type_version, (void *)sym); + PyTypeObject *slots_type = _PyType_LookupByVersion(sym->slots.type_version); + if (slots_type) { + printf("<%s slots[%d] v%u at %p>", slots_type->tp_name, sym->slots.num_slots, sym->slots.type_version, (void *)sym); + } else { + printf("", sym->slots.num_slots, sym->slots.type_version, (void *)sym); + } break; default: printf("", sym->tag, (void *)sym); @@ -890,6 +895,7 @@ _Py_uop_sym_new_slots_object(JitOptContext *ctx, unsigned int type_version) } res->tag = JIT_SYM_SLOTS_TAG; res->slots.num_slots = 0; + res->slots.slots = NULL; res->slots.type_version = type_version; return PyJitRef_Wrap(res); } @@ -899,8 +905,7 @@ _Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index { JitOptSymbol *sym = PyJitRef_Unwrap(ref); - if (sym->tag == JIT_SYM_SLOTS_TAG) { - // Linear search through the mapping array + if (sym->tag == JIT_SYM_SLOTS_TAG && sym->slots.slots != NULL) { for (int i = 0; i < sym->slots.num_slots; i++) { if (sym->slots.slots[i].slot_index == slot_index) { return PyJitRef_Wrap(allocation_base(ctx) + sym->slots.slots[i].symbol); @@ -908,10 +913,20 @@ _Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index } } - // Not found, return not_null return _Py_uop_sym_new_not_null(ctx); } +static JitOptSlotMapping * +slots_arena_alloc(JitOptContext *ctx) +{ + if (ctx->s_arena.slots_curr_number + MAX_SYMBOLIC_SLOTS_SIZE > ctx->s_arena.slots_max_number) { + return NULL; + } + JitOptSlotMapping *slots = &ctx->s_arena.arena[ctx->s_arena.slots_curr_number]; + ctx->s_arena.slots_curr_number += MAX_SYMBOLIC_SLOTS_SIZE; + return slots; +} + void _Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value) { @@ -922,31 +937,44 @@ _Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index sym->tag = JIT_SYM_SLOTS_TAG; sym->slots.type_version = version; sym->slots.num_slots = 0; + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } } else if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { uint32_t version = sym->cls.version; sym->tag = JIT_SYM_SLOTS_TAG; sym->slots.type_version = version; sym->slots.num_slots = 0; + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } } else if (sym->tag != JIT_SYM_SLOTS_TAG) { return; } - - if (sym->slots.num_slots >= MAX_SYMBOLIC_SLOTS_SIZE) { - return; + // Check if have arena space allocated + if (sym->slots.slots == NULL) { + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } } - + // Check if the slot already exists for (int i = 0; i < sym->slots.num_slots; i++) { if (sym->slots.slots[i].slot_index == slot_index) { sym->slots.slots[i].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); return; } } - - int idx = sym->slots.num_slots++; - sym->slots.slots[idx].slot_index = slot_index; - sym->slots.slots[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + // Add new mapping if there's space + if (sym->slots.num_slots < MAX_SYMBOLIC_SLOTS_SIZE) { + int idx = sym->slots.num_slots++; + sym->slots.slots[idx].slot_index = slot_index; + sym->slots.slots[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + } } // 0 on success, -1 on error. @@ -1038,6 +1066,10 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx) ctx->t_arena.ty_curr_number = 0; ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + // Setup the arena for slot mappings. + ctx->s_arena.slots_curr_number = 0; + ctx->s_arena.slots_max_number = SLOTS_ARENA_SIZE; + // Frame setup ctx->curr_frame_depth = 0; From 20920fc08779a395308e246037db8fa462355b1d Mon Sep 17 00:00:00 2001 From: cocolato Date: Thu, 22 Jan 2026 02:17:07 +0800 Subject: [PATCH 3/4] fix switch syntax error --- Python/optimizer_symbols.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 336f420759a422..6f19d1fdd7c56d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -113,7 +113,7 @@ _PyUOpSymPrint(JitOptRef ref) case JIT_SYM_COMPACT_INT: printf("", (void *)sym); break; - case JIT_SYM_SLOTS_TAG: + case JIT_SYM_SLOTS_TAG: { PyTypeObject *slots_type = _PyType_LookupByVersion(sym->slots.type_version); if (slots_type) { printf("<%s slots[%d] v%u at %p>", slots_type->tp_name, sym->slots.num_slots, sym->slots.type_version, (void *)sym); @@ -121,6 +121,7 @@ _PyUOpSymPrint(JitOptRef ref) printf("", sym->slots.num_slots, sym->slots.type_version, (void *)sym); } break; + } default: printf("", sym->tag, (void *)sym); break; From 5b4bd5f038694e33954240f8e1836d45b451bd7d Mon Sep 17 00:00:00 2001 From: cocolato Date: Thu, 22 Jan 2026 23:39:25 +0800 Subject: [PATCH 4/4] fix arena struct --- Include/internal/pycore_optimizer.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 77ea5e770264cb..624ce0f27d018b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -37,6 +37,9 @@ typedef struct _JitOptContext { // Arena for the symbolic types. ty_arena t_arena; + // Arena for the slots mappings. + slots_arena s_arena; + JitOptRef *n_consumed; JitOptRef *limit; JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];