runtime/bytecode.cpp (303 lines of code) (raw):

// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) #include "bytecode.h" #include "ic.h" #include "runtime.h" namespace py { const char* const kBytecodeNames[] = { #define NAME(name, value, handler) #name, FOREACH_BYTECODE(NAME) #undef NAME }; BytecodeOp nextBytecodeOp(const MutableBytes& bytecode, word* index) { word i = *index; Bytecode bc = rewrittenBytecodeOpAt(bytecode, i); int32_t arg = rewrittenBytecodeArgAt(bytecode, i); uint16_t cache = rewrittenBytecodeCacheAt(bytecode, i); i++; while (bc == Bytecode::EXTENDED_ARG) { bc = rewrittenBytecodeOpAt(bytecode, i); arg = (arg << kBitsPerByte) | rewrittenBytecodeArgAt(bytecode, i); i++; } DCHECK(i - *index <= 8, "EXTENDED_ARG-encoded arg must fit in int32_t"); *index = i; return BytecodeOp{bc, arg, cache}; } const word kOpcodeOffset = 0; const word kArgOffset = 1; const word kCacheOffset = 2; word bytecodeLength(const Bytes& bytecode) { return bytecode.length() / kCompilerCodeUnitSize; } Bytecode bytecodeOpAt(const Bytes& bytecode, word index) { return static_cast<Bytecode>( bytecode.byteAt(index * kCompilerCodeUnitSize + kOpcodeOffset)); } byte bytecodeArgAt(const Bytes& bytecode, word index) { return bytecode.byteAt(index * kCompilerCodeUnitSize + kArgOffset); } word rewrittenBytecodeLength(const MutableBytes& bytecode) { return bytecode.length() / kCodeUnitSize; } Bytecode rewrittenBytecodeOpAt(const MutableBytes& bytecode, word index) { return static_cast<Bytecode>( bytecode.byteAt(index * kCodeUnitSize + kOpcodeOffset)); } void rewrittenBytecodeOpAtPut(const MutableBytes& bytecode, word index, Bytecode op) { bytecode.byteAtPut(index * kCodeUnitSize + kOpcodeOffset, static_cast<byte>(op)); } byte rewrittenBytecodeArgAt(const MutableBytes& bytecode, word index) { return bytecode.byteAt(index * kCodeUnitSize + kArgOffset); } void rewrittenBytecodeArgAtPut(const MutableBytes& bytecode, word index, byte arg) { bytecode.byteAtPut(index * kCodeUnitSize + kArgOffset, arg); } uint16_t rewrittenBytecodeCacheAt(const MutableBytes& bytecode, word index) { return bytecode.uint16At(index * kCodeUnitSize + kCacheOffset); } void rewrittenBytecodeCacheAtPut(const MutableBytes& bytecode, word index, uint16_t cache) { bytecode.uint16AtPut(index * kCodeUnitSize + kCacheOffset, cache); } int8_t opargFromObject(RawObject object) { DCHECK(!object.isHeapObject(), "Heap objects are disallowed"); return static_cast<int8_t>(object.raw()); } struct RewrittenOp { Bytecode bc; int32_t arg; bool needs_inline_cache; }; static RewrittenOp rewriteOperation(const Function& function, BytecodeOp op, bool use_load_fast_reverse_unchecked) { auto cached_binop = [](Interpreter::BinaryOp bin_op) { return RewrittenOp{BINARY_OP_ANAMORPHIC, static_cast<int32_t>(bin_op), true}; }; auto cached_inplace = [](Interpreter::BinaryOp bin_op) { return RewrittenOp{INPLACE_OP_ANAMORPHIC, static_cast<int32_t>(bin_op), true}; }; switch (op.bc) { case BINARY_ADD: return cached_binop(Interpreter::BinaryOp::ADD); case BINARY_AND: return cached_binop(Interpreter::BinaryOp::AND); case BINARY_FLOOR_DIVIDE: return cached_binop(Interpreter::BinaryOp::FLOORDIV); case BINARY_LSHIFT: return cached_binop(Interpreter::BinaryOp::LSHIFT); case BINARY_MATRIX_MULTIPLY: return cached_binop(Interpreter::BinaryOp::MATMUL); case BINARY_MODULO: return cached_binop(Interpreter::BinaryOp::MOD); case BINARY_MULTIPLY: return cached_binop(Interpreter::BinaryOp::MUL); case BINARY_OR: return cached_binop(Interpreter::BinaryOp::OR); case BINARY_POWER: return cached_binop(Interpreter::BinaryOp::POW); case BINARY_RSHIFT: return cached_binop(Interpreter::BinaryOp::RSHIFT); case BINARY_SUBSCR: return RewrittenOp{BINARY_SUBSCR_ANAMORPHIC, op.arg, true}; case BINARY_SUBTRACT: return cached_binop(Interpreter::BinaryOp::SUB); case BINARY_TRUE_DIVIDE: return cached_binop(Interpreter::BinaryOp::TRUEDIV); case BINARY_XOR: return cached_binop(Interpreter::BinaryOp::XOR); case COMPARE_OP: switch (op.arg) { case CompareOp::LT: case CompareOp::LE: case CompareOp::EQ: case CompareOp::NE: case CompareOp::GT: case CompareOp::GE: return RewrittenOp{COMPARE_OP_ANAMORPHIC, op.arg, true}; case CompareOp::IN: return RewrittenOp{COMPARE_IN_ANAMORPHIC, 0, true}; // TODO(T61327107): Implement COMPARE_NOT_IN. case CompareOp::IS: return RewrittenOp{COMPARE_IS, 0, false}; case CompareOp::IS_NOT: return RewrittenOp{COMPARE_IS_NOT, 0, false}; } break; case CALL_FUNCTION: return RewrittenOp{CALL_FUNCTION_ANAMORPHIC, op.arg, true}; case FOR_ITER: return RewrittenOp{FOR_ITER_ANAMORPHIC, op.arg, true}; case INPLACE_ADD: return cached_inplace(Interpreter::BinaryOp::ADD); case INPLACE_AND: return cached_inplace(Interpreter::BinaryOp::AND); case INPLACE_FLOOR_DIVIDE: return cached_inplace(Interpreter::BinaryOp::FLOORDIV); case INPLACE_LSHIFT: return cached_inplace(Interpreter::BinaryOp::LSHIFT); case INPLACE_MATRIX_MULTIPLY: return cached_inplace(Interpreter::BinaryOp::MATMUL); case INPLACE_MODULO: return cached_inplace(Interpreter::BinaryOp::MOD); case INPLACE_MULTIPLY: return cached_inplace(Interpreter::BinaryOp::MUL); case INPLACE_OR: return cached_inplace(Interpreter::BinaryOp::OR); case INPLACE_POWER: return cached_inplace(Interpreter::BinaryOp::POW); case INPLACE_RSHIFT: return cached_inplace(Interpreter::BinaryOp::RSHIFT); case INPLACE_SUBTRACT: return cached_inplace(Interpreter::BinaryOp::SUB); case INPLACE_TRUE_DIVIDE: return cached_inplace(Interpreter::BinaryOp::TRUEDIV); case INPLACE_XOR: return cached_inplace(Interpreter::BinaryOp::XOR); case LOAD_ATTR: return RewrittenOp{LOAD_ATTR_ANAMORPHIC, op.arg, true}; case LOAD_FAST: { CHECK(op.arg < Code::cast(function.code()).nlocals(), "unexpected local number"); word total_locals = function.totalLocals(); // Check if the original opcode uses an extended arg if (op.arg > kMaxByte) { break; } int32_t reverse_arg = total_locals - op.arg - 1; // Check that the new value fits in a byte if (reverse_arg > kMaxByte) { break; } // TODO(T66255738): Use a more complete static analysis to capture all // bound local variables other than just arguments. return RewrittenOp{ // Arguments are always bound. (op.arg < function.totalArgs() && use_load_fast_reverse_unchecked) ? LOAD_FAST_REVERSE_UNCHECKED : LOAD_FAST_REVERSE, reverse_arg, false}; } case LOAD_METHOD: return RewrittenOp{LOAD_METHOD_ANAMORPHIC, op.arg, true}; case STORE_ATTR: return RewrittenOp{STORE_ATTR_ANAMORPHIC, op.arg, true}; case STORE_FAST: { CHECK(op.arg < Code::cast(function.code()).nlocals(), "unexpected local number"); // Check if the original opcode uses an extended arg if (op.arg > kMaxByte) { break; } word total_locals = function.totalLocals(); int32_t reverse_arg = total_locals - op.arg - 1; // Check that the new value fits in a byte if (reverse_arg > kMaxByte) { break; } return RewrittenOp{STORE_FAST_REVERSE, reverse_arg, false}; } case STORE_SUBSCR: return RewrittenOp{STORE_SUBSCR_ANAMORPHIC, op.arg, true}; case LOAD_CONST: { RawObject arg_obj = Tuple::cast(Code::cast(function.code()).consts()).at(op.arg); if (!arg_obj.isHeapObject()) { if (arg_obj.isBool()) { // We encode true/false not as 1/0 but as 0x80/0 to save an x86 // assembly instruction; moving the value to the 2nd byte can be done // with a multiplication by 2 as part of an address expression rather // than needing a separate shift by 8 in the 1/0 variant. return RewrittenOp{LOAD_BOOL, Bool::cast(arg_obj).value() ? 0x80 : 0, false}; } // This condition is true only the object fits in a byte. Some // immediate values of SmallInt and SmallStr do not satify this // condition. if (arg_obj == objectFromOparg(opargFromObject(arg_obj))) { return RewrittenOp{LOAD_IMMEDIATE, opargFromObject(arg_obj), false}; } } } break; case BINARY_OP_ANAMORPHIC: case COMPARE_OP_ANAMORPHIC: case FOR_ITER_ANAMORPHIC: case INPLACE_OP_ANAMORPHIC: case LOAD_ATTR_ANAMORPHIC: case LOAD_FAST_REVERSE: case LOAD_METHOD_ANAMORPHIC: case STORE_ATTR_ANAMORPHIC: case STORE_FAST_REVERSE: UNREACHABLE("should not have cached opcode in input"); default: break; } return RewrittenOp{UNUSED_BYTECODE_0, 0, false}; } RawObject expandBytecode(Thread* thread, const Bytes& bytecode) { // Bytecode comes in in (OP, ARG) pairs. Bytecode goes out in (OP, ARG, // CACHE, CACHE) four-tuples. HandleScope scope(thread); word num_opcodes = bytecodeLength(bytecode); MutableBytes result(&scope, thread->runtime()->newMutableBytesUninitialized( num_opcodes * kCodeUnitSize)); for (word i = 0; i < num_opcodes; i++) { rewrittenBytecodeOpAtPut(result, i, bytecodeOpAt(bytecode, i)); rewrittenBytecodeArgAtPut(result, i, bytecodeArgAt(bytecode, i)); rewrittenBytecodeCacheAtPut(result, i, 0); } return *result; } static const word kMaxCaches = 65536; void rewriteBytecode(Thread* thread, const Function& function) { HandleScope scope(thread); Runtime* runtime = thread->runtime(); // Add cache entries for global variables. // TODO(T58223091): This is going to over allocate somewhat in order // to simplify the indexing arithmetic. Not all names are used for // globals, some are used for attributes. This is good enough for // now. word names_length = Tuple::cast(Code::cast(function.code()).names()).length(); word num_global_caches = Utils::roundUpDiv(names_length, kIcPointersPerEntry); if (!function.hasOptimizedOrNewlocals()) { if (num_global_caches > 0) { MutableTuple caches(&scope, runtime->newMutableTuple( num_global_caches * kIcPointersPerEntry)); caches.fill(NoneType::object()); function.setCaches(*caches); } return; } MutableBytes bytecode(&scope, function.rewrittenBytecode()); word num_opcodes = rewrittenBytecodeLength(bytecode); bool use_load_fast_reverse_unchecked = true; // Scan bytecode to figure out how many caches we need and if we can use // LOAD_FAST_REVERSE_UNCHECKED. word num_caches = num_global_caches; for (word i = 0; i < num_opcodes;) { BytecodeOp op = nextBytecodeOp(bytecode, &i); if (op.bc == DELETE_FAST) { use_load_fast_reverse_unchecked = false; continue; } RewrittenOp rewritten = rewriteOperation(function, op, false); if (rewritten.needs_inline_cache) { num_caches++; } } if (num_caches > kMaxCaches) { // Populate global variable caches unconditionally since the interpreter // assumes their existence. if (num_global_caches > 0) { MutableTuple caches(&scope, runtime->newMutableTuple( num_global_caches * kIcPointersPerEntry)); caches.fill(NoneType::object()); function.setCaches(*caches); } return; } word cache = num_global_caches; for (word i = 0; i < num_opcodes;) { BytecodeOp op = nextBytecodeOp(bytecode, &i); word previous_index = i - 1; RewrittenOp rewritten = rewriteOperation(function, op, use_load_fast_reverse_unchecked); if (rewritten.bc == UNUSED_BYTECODE_0) continue; if (rewritten.needs_inline_cache) { rewrittenBytecodeOpAtPut(bytecode, previous_index, rewritten.bc); rewrittenBytecodeArgAtPut(bytecode, previous_index, static_cast<byte>(rewritten.arg)); rewrittenBytecodeCacheAtPut(bytecode, previous_index, cache); cache++; } else if (rewritten.arg != op.arg || rewritten.bc != op.bc) { rewrittenBytecodeOpAtPut(bytecode, previous_index, rewritten.bc); rewrittenBytecodeArgAtPut(bytecode, previous_index, static_cast<byte>(rewritten.arg)); } } DCHECK(cache == num_caches, "cache size mismatch"); if (cache > 0) { MutableTuple caches(&scope, runtime->newMutableTuple(cache * kIcPointersPerEntry)); caches.fill(NoneType::object()); function.setCaches(*caches); } } } // namespace py