diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2ee518fb82f301..624ce0f27d018b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -37,6 +37,9 @@ typedef struct _JitOptContext { // Arena for the symbolic types. ty_arena t_arena; + // Arena for the slots mappings. + slots_arena s_arena; + JitOptRef *n_consumed; JitOptRef *limit; JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; @@ -295,6 +298,9 @@ extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, extern bool _Py_uop_sym_is_compact_int(JitOptRef sym); extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx); extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_slots_object(JitOptContext *ctx, unsigned int type_version); +extern JitOptRef _Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index); +extern void _Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value); extern JitOptRef _Py_uop_sym_new_predicate(JitOptContext *ctx, JitOptRef lhs_ref, JitOptRef rhs_ref, JitOptPredicateKind kind); extern void _Py_uop_sym_apply_predicate_narrowing(JitOptContext *ctx, JitOptRef sym, bool branch_is_true); diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index a879ca26ce7b63..96abb5db0ab839 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -16,6 +16,10 @@ extern "C" { #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) +// Maximum slots per object tracked symbolically +#define MAX_SYMBOLIC_SLOTS_SIZE 16 +#define SLOTS_ARENA_SIZE (MAX_SYMBOLIC_SLOTS_SIZE * 100) + // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (16) @@ -41,6 +45,7 @@ typedef enum _JitSymType { JIT_SYM_TRUTHINESS_TAG = 9, JIT_SYM_COMPACT_INT = 10, JIT_SYM_PREDICATE_TAG = 11, + JIT_SYM_SLOTS_TAG = 12, } JitSymType; typedef struct _jit_opt_known_class { @@ -89,6 +94,18 @@ typedef struct { uint8_t tag; } JitOptCompactInt; +typedef struct { + uint16_t slot_index; + uint16_t symbol; +} JitOptSlotMapping; + +typedef struct _jit_opt_slots { + uint8_t tag; + uint8_t num_slots; + uint32_t type_version; + JitOptSlotMapping *slots; +} JitOptSlotsObject; + typedef union _jit_opt_symbol { uint8_t tag; JitOptKnownClass cls; @@ -97,6 +114,7 @@ typedef union _jit_opt_symbol { JitOptTuple tuple; JitOptTruthiness truthiness; JitOptCompactInt compact; + JitOptSlotsObject slots; JitOptPredicate predicate; } JitOptSymbol; @@ -126,6 +144,11 @@ typedef struct ty_arena { JitOptSymbol arena[TY_ARENA_SIZE]; } ty_arena; +typedef struct slots_arena { + int slots_curr_number; + int slots_max_number; + JitOptSlotMapping arena[SLOTS_ARENA_SIZE]; +} slots_arena; #ifdef __cplusplus } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c6a1ae60a317fa..fea6300b192094 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -247,6 +247,9 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_slots_object _Py_uop_sym_new_slots_object +#define sym_slots_getattr _Py_uop_sym_slots_getattr +#define sym_slots_setattr _Py_uop_sym_slots_setattr #define sym_new_predicate _Py_uop_sym_new_predicate #define sym_apply_predicate_narrowing _Py_uop_sym_apply_predicate_narrowing diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 1584e731d1b2d4..f6cbb4807ebb6d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -38,6 +38,9 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_slots_object _Py_uop_sym_new_slots_object +#define sym_slots_getattr _Py_uop_sym_slots_getattr +#define sym_slots_setattr _Py_uop_sym_slots_setattr #define sym_new_predicate _Py_uop_sym_new_predicate #define sym_apply_predicate_narrowing _Py_uop_sym_apply_predicate_narrowing @@ -125,8 +128,7 @@ dummy_func(void) { } op(_STORE_ATTR_SLOT, (index/1, value, owner -- o)) { - (void)index; - (void)value; + sym_slots_setattr(ctx, owner, (uint16_t)index, value); o = owner; } @@ -710,8 +712,7 @@ dummy_func(void) { } op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, o)) { - attr = sym_new_not_null(ctx); - (void)index; + attr = sym_slots_getattr(ctx, owner, (uint16_t)index); o = owner; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 341805d51e24cd..56cf7d847ab18c 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1919,8 +1919,7 @@ JitOptRef o; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand0; - attr = sym_new_not_null(ctx); - (void)index; + attr = sym_slots_getattr(ctx, owner, (uint16_t)index); o = owner; CHECK_STACK_BOUNDS(1); stack_pointer[-1] = attr; @@ -2027,8 +2026,7 @@ owner = stack_pointer[-1]; value = stack_pointer[-2]; uint16_t index = (uint16_t)this_instr->operand0; - (void)index; - (void)value; + sym_slots_setattr(ctx, owner, (uint16_t)index, value); o = owner; CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = o; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index a9640aaa5072c5..6d21b0f20af9ec 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -113,6 +113,15 @@ _PyUOpSymPrint(JitOptRef ref) case JIT_SYM_COMPACT_INT: printf("", (void *)sym); break; + case JIT_SYM_SLOTS_TAG: { + PyTypeObject *slots_type = _PyType_LookupByVersion(sym->slots.type_version); + if (slots_type) { + printf("<%s slots[%d] v%u at %p>", slots_type->tp_name, sym->slots.num_slots, sym->slots.type_version, (void *)sym); + } else { + printf("", sym->slots.num_slots, sym->slots.type_version, (void *)sym); + } + break; + } default: printf("", sym->tag, (void *)sym); break; @@ -320,6 +329,11 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_SLOTS_TAG: + if (typ->tp_version_tag != sym->slots.type_version) { + sym_set_bottom(ctx, sym); + } + return; } } @@ -384,6 +398,12 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int ver return false; } return true; + case JIT_SYM_SLOTS_TAG: + if (version != sym->slots.type_version) { + sym_set_bottom(ctx, sym); + return false; + } + return true; } Py_UNREACHABLE(); } @@ -483,6 +503,9 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_SLOTS_TAG: + sym_set_bottom(ctx, sym); + return; } } @@ -603,7 +626,8 @@ _Py_uop_sym_get_type(JitOptRef ref) return &PyBool_Type; case JIT_SYM_COMPACT_INT: return &PyLong_Type; - + case JIT_SYM_SLOTS_TAG: + return _PyType_LookupByVersion(sym->slots.type_version); } Py_UNREACHABLE(); } @@ -632,6 +656,8 @@ _Py_uop_sym_get_type_version(JitOptRef ref) return PyBool_Type.tp_version_tag; case JIT_SYM_COMPACT_INT: return PyLong_Type.tp_version_tag; + case JIT_SYM_SLOTS_TAG: + return sym->slots.type_version; } Py_UNREACHABLE(); } @@ -666,6 +692,7 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref) case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: case JIT_SYM_COMPACT_INT: + case JIT_SYM_SLOTS_TAG: return -1; case JIT_SYM_KNOWN_CLASS_TAG: /* TODO : @@ -823,6 +850,7 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) case JIT_SYM_TUPLE_TAG: case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: + case JIT_SYM_SLOTS_TAG: sym_set_bottom(ctx, sym); return; case JIT_SYM_BOTTOM_TAG: @@ -935,6 +963,97 @@ _Py_uop_sym_new_compact_int(JitOptContext *ctx) return PyJitRef_Wrap(sym); } +JitOptRef +_Py_uop_sym_new_slots_object(JitOptContext *ctx, unsigned int type_version) +{ + JitOptSymbol *res = sym_new(ctx); + if (res == NULL) { + return out_of_space_ref(ctx); + } + res->tag = JIT_SYM_SLOTS_TAG; + res->slots.num_slots = 0; + res->slots.slots = NULL; + res->slots.type_version = type_version; + return PyJitRef_Wrap(res); +} + +JitOptRef +_Py_uop_sym_slots_getattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + + if (sym->tag == JIT_SYM_SLOTS_TAG && sym->slots.slots != NULL) { + for (int i = 0; i < sym->slots.num_slots; i++) { + if (sym->slots.slots[i].slot_index == slot_index) { + return PyJitRef_Wrap(allocation_base(ctx) + sym->slots.slots[i].symbol); + } + } + } + + return _Py_uop_sym_new_not_null(ctx); +} + +static JitOptSlotMapping * +slots_arena_alloc(JitOptContext *ctx) +{ + if (ctx->s_arena.slots_curr_number + MAX_SYMBOLIC_SLOTS_SIZE > ctx->s_arena.slots_max_number) { + return NULL; + } + JitOptSlotMapping *slots = &ctx->s_arena.arena[ctx->s_arena.slots_curr_number]; + ctx->s_arena.slots_curr_number += MAX_SYMBOLIC_SLOTS_SIZE; + return slots; +} + +void +_Py_uop_sym_slots_setattr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + + if (sym->tag == JIT_SYM_TYPE_VERSION_TAG) { + uint32_t version = sym->version.version; + sym->tag = JIT_SYM_SLOTS_TAG; + sym->slots.type_version = version; + sym->slots.num_slots = 0; + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } + } + else if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { + uint32_t version = sym->cls.version; + sym->tag = JIT_SYM_SLOTS_TAG; + sym->slots.type_version = version; + sym->slots.num_slots = 0; + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } + } + else if (sym->tag != JIT_SYM_SLOTS_TAG) { + return; + } + // Check if have arena space allocated + if (sym->slots.slots == NULL) { + sym->slots.slots = slots_arena_alloc(ctx); + if (sym->slots.slots == NULL) { + return; + } + } + // Check if the slot already exists + for (int i = 0; i < sym->slots.num_slots; i++) { + if (sym->slots.slots[i].slot_index == slot_index) { + sym->slots.slots[i].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + return; + } + } + // Add new mapping if there's space + if (sym->slots.num_slots < MAX_SYMBOLIC_SLOTS_SIZE) { + int idx = sym->slots.num_slots++; + sym->slots.slots[idx].slot_index = slot_index; + sym->slots.slots[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + } +} + // 0 on success, -1 on error. _Py_UOpsAbstractFrame * _Py_uop_frame_new( @@ -1024,6 +1143,10 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx) ctx->t_arena.ty_curr_number = 0; ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + // Setup the arena for slot mappings. + ctx->s_arena.slots_curr_number = 0; + ctx->s_arena.slots_max_number = SLOTS_ARENA_SIZE; + // Frame setup ctx->curr_frame_depth = 0; @@ -1341,6 +1464,33 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "43 is not an int"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref_int) == val_43, "43 isn't 43"); + JitOptRef slots_obj = _Py_uop_sym_new_slots_object(ctx, 42); + TEST_PREDICATE(!_Py_uop_sym_is_null(slots_obj), "slots object is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(slots_obj), "slots object is not not-null"); + TEST_PREDICATE(_Py_uop_sym_get_type_version(slots_obj) == 42, + "slots object has wrong type version"); + + JitOptRef slot_val = _Py_uop_sym_new_const(ctx, val_42); + _Py_uop_sym_slots_setattr(ctx, slots_obj, 0, slot_val); + JitOptRef retrieved = _Py_uop_sym_slots_getattr(ctx, slots_obj, 0); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, retrieved) == val_42, + "slots getattr(0) didn't return val_42"); + + JitOptRef missing = _Py_uop_sym_slots_getattr(ctx, slots_obj, 99); + TEST_PREDICATE(_Py_uop_sym_is_not_null(missing), "missing slot is not not-null"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, missing), "missing slot is const"); + + JitOptRef slot_val2 = _Py_uop_sym_new_const(ctx, val_43); + _Py_uop_sym_slots_setattr(ctx, slots_obj, 0, slot_val2); + retrieved = _Py_uop_sym_slots_getattr(ctx, slots_obj, 0); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, retrieved) == val_43, + "slots getattr(0) didn't return val_43 after update"); + + JitOptRef slots_obj2 = _Py_uop_sym_new_slots_object(ctx, 42); + _Py_uop_sym_set_type_version(ctx, slots_obj2, 43); + TEST_PREDICATE(_Py_uop_sym_is_bottom(slots_obj2), + "slots object with wrong type version isn't bottom"); + _Py_uop_abstractcontext_fini(ctx); Py_DECREF(val_42); Py_DECREF(val_43);