LIRGenerator::TranslatedBlock LIRGenerator::TranslateOneBasicBlock()

in Jit/lir/generator.cpp [648:2882]


LIRGenerator::TranslatedBlock LIRGenerator::TranslateOneBasicBlock(
    const hir::BasicBlock* hir_bb) {
  BasicBlockBuilder bbb(env_, lir_func_);

  for (auto& i : *hir_bb) {
    auto opcode = i.opcode();
    bbb.setCurrentInstr(&i);
    switch (opcode) {
      case Opcode::kLoadArg: {
        auto instr = static_cast<const LoadArg*>(&i);
        JIT_DCHECK(
            instr->arg_idx() < env_->arg_locations.size(),
            "Inconsistent number of args");
        PhyLocation phy_loc = env_->arg_locations[instr->arg_idx()];
        if (phy_loc.is_memory()) {
          bbb.AppendCode(
              "Load {}, __asm_extra_args, {}",
              instr->dst(),
              -(phy_loc + 1) * kPointerSize);
        } else {
          bbb.AppendCode("LoadArg {} {}", instr->dst(), instr->arg_idx());
        }
        break;
      }
      case Opcode::kLoadCurrentFunc: {
        bbb.AppendCode("Move {}, __asm_func", i.GetOutput());
        break;
      }
      case Opcode::kMakeCell: {
        auto instr = static_cast<const MakeCell*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyCell_New),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kStealCellItem:
      case Opcode::kLoadCellItem: {
        bbb.AppendCode(
            "Load {}, {}, {}",
            i.GetOutput(),
            i.GetOperand(0),
            offsetof(PyCellObject, ob_ref));
        break;
      }
      case Opcode::kSetCellItem: {
        auto instr = static_cast<const SetCellItem*>(&i);
        bbb.AppendCode(
            "Store {}, {}, {}",
            instr->GetOperand(1),
            instr->GetOperand(0),
            offsetof(PyCellObject, ob_ref));
        break;
      }
      case Opcode::kLoadConst: {
        auto instr = static_cast<const LoadConst*>(&i);
        Type ty = instr->type();
        if (ty <= TCDouble) {
          auto tmp_name = GetSafeTempName();
          double_t spec_value = ty.doubleSpec();
          auto v = bit_cast<uint64_t>(spec_value);
          // This loads the bits of the double into memory
          bbb.AppendCode("Move {}:{}, {:#x}", tmp_name, TCUInt64, v);
          // This moves the value into a floating point register
          bbb.AppendCode(
              "Move {}:{}, {}",
              instr->dst()->name(),
              ty.unspecialized(),
              tmp_name);
        } else {
          intptr_t spec_value = ty.hasIntSpec()
              ? ty.intSpec()
              : reinterpret_cast<intptr_t>(ty.asObject());
          bbb.AppendCode(
              "Move {}:{}, {:#x}",
              instr->dst()->name(),
              ty.unspecialized(),
              spec_value);
        }
        break;
      }
      case Opcode::kLoadVarObjectSize: {
        const size_t kSizeOffset = offsetof(PyVarObject, ob_size);
        bbb.AppendCode(
            "Load {}, {}, {}", i.GetOutput(), i.GetOperand(0), kSizeOffset);
        break;
      }
      case Opcode::kLoadFunctionIndirect: {
        // format will pass this down as a constant
        auto instr = static_cast<const LoadFunctionIndirect*>(&i);
        bbb.AppendCode(
            "Call {} {:#x}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(JITRT_LoadFunctionIndirect),
            reinterpret_cast<uint64_t>(instr->funcptr()),
            reinterpret_cast<uint64_t>(instr->descr()));
        break;
      }
      case Opcode::kIntConvert: {
        auto instr = static_cast<const IntConvert*>(&i);
        if (instr->type() <= TCUnsigned) {
          bbb.AppendCode("ConvertUnsigned {}, {}", instr->dst(), instr->src());
        } else {
          JIT_CHECK(
              instr->type() <= TCSigned,
              "Unexpected IntConvert type %s",
              instr->type());
          bbb.AppendCode("Convert {}, {}", instr->dst(), instr->src());
        }
        break;
      }
      case Opcode::kIntBinaryOp: {
        auto instr = static_cast<const IntBinaryOp*>(&i);
        std::string op;
        std::string convert;
        std::string extra_arg = "";
        uint64_t helper = 0;
        switch (instr->op()) {
          case BinaryOpKind::kAdd:
            op = "Add";
            break;
          case BinaryOpKind::kAnd:
            op = "And";
            break;
          case BinaryOpKind::kSubtract:
            op = "Sub";
            break;
          case BinaryOpKind::kXor:
            op = "Xor";
            break;
          case BinaryOpKind::kOr:
            op = "Or";
            break;
          case BinaryOpKind::kMultiply:
            op = "Mul";
            break;
          case BinaryOpKind::kLShift:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "Convert";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftLeft32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftLeft64);
                break;
            }
            break;
          case BinaryOpKind::kRShift:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "Convert";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftRight32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftRight64);
                break;
            }
            break;
          case BinaryOpKind::kRShiftUnsigned:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "ConvertUnsigned";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftRightUnsigned32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_ShiftRightUnsigned64);
                break;
            }
            break;
          case BinaryOpKind::kFloorDivide:
            op = "Div";
            extra_arg = "0, ";
            break;
          case BinaryOpKind::kFloorDivideUnsigned:
            op = "DivUn";
            extra_arg = "0, ";
            break;
          case BinaryOpKind::kModulo:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "Convert";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_Mod32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_Mod64);
                break;
            }
            break;
          case BinaryOpKind::kModuloUnsigned:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "ConvertUnsigned";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_ModUnsigned32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_ModUnsigned64);
                break;
            }
            break;
          case BinaryOpKind::kPower:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "Convert";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_Power32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_Power64);
                break;
            };
            break;
          case BinaryOpKind::kPowerUnsigned:
            switch (bytes_from_cint_type(instr->GetOperand(0)->type())) {
              case 1:
              case 2:
                convert = "ConvertUnsigned";
              case 3:
                helper = reinterpret_cast<uint64_t>(JITRT_PowerUnsigned32);
                break;
              case 4:
                helper = reinterpret_cast<uint64_t>(JITRT_PowerUnsigned64);
                break;
            };
            break;
          default:
            JIT_CHECK(false, "not implemented");
            break;
        }
        if (helper != 0) {
          std::string left = instr->left()->name();
          std::string right = instr->right()->name();
          if (convert != "") {
            std::string ltmp = GetSafeTempName();
            std::string rtmp = GetSafeTempName();
            std::string type = convert == "Convert" ? "CInt32" : "CUInt32";
            bbb.AppendCode("{} {}:{}, {}", convert, ltmp, type, left);
            bbb.AppendCode("{} {}:{}, {}", convert, rtmp, type, right);
            left = ltmp;
            right = rtmp;
          }
          bbb.AppendCode(
              "Call {} {:#x}, {}, {}", instr->dst(), helper, left, right);
        } else {
          bbb.AppendCode(
              "{} {}, {} {}, {}",
              op,
              instr->dst(),
              extra_arg,
              instr->left(),
              instr->right());
        }

        break;
      }
      case Opcode::kDoubleBinaryOp: {
        auto instr = static_cast<const DoubleBinaryOp*>(&i);
        std::string op;
        uint64_t helper = 0;

        switch (instr->op()) {
          case BinaryOpKind::kAdd: {
            op = "Fadd";
            break;
          }
          case BinaryOpKind::kSubtract: {
            op = "Fsub";
            break;
          }
          case BinaryOpKind::kMultiply: {
            op = "Fmul";
            break;
          }
          case BinaryOpKind::kTrueDivide: {
            op = "Fdiv";
            break;
          }
          case BinaryOpKind::kPower: {
            helper = reinterpret_cast<uint64_t>(JITRT_PowerDouble);
            break;
          }
          default: {
            JIT_CHECK(false, "Invalid operation for DoubleBinaryOp");
            break;
          }
        }

        if (helper != 0) {
          bbb.AppendCode(
              "Call {} {:#x}, {}, {}",
              instr->dst(),
              helper,
              instr->left()->name(),
              instr->right()->name());
          break;
        }
        // Our formatter for Registers tries to be clever with constant values,
        // and this backfires in certain situations (it converts registers to
        // immediates). We have to manually format the name and type here to
        // work around that.
        auto codestr = fmt::format(
            "{} {}, {}:{}, {}:{}",
            op,
            instr->dst(),
            instr->left()->name(),
            instr->left()->type().unspecialized(),
            instr->right()->name(),
            instr->right()->type().unspecialized());
        bbb.AppendCode(codestr);
        break;
      }
      case Opcode::kPrimitiveCompare: {
        auto instr = static_cast<const PrimitiveCompare*>(&i);
        std::string op;
        switch (instr->op()) {
          case PrimitiveCompareOp::kEqual:
            op = "Equal";
            break;
          case PrimitiveCompareOp::kNotEqual:
            op = "NotEqual";
            break;
          case PrimitiveCompareOp::kGreaterThanUnsigned:
            op = "GreaterThanUnsigned";
            break;
          case PrimitiveCompareOp::kGreaterThan:
            op = "GreaterThanSigned";
            break;
          case PrimitiveCompareOp::kLessThanUnsigned:
            op = "LessThanUnsigned";
            break;
          case PrimitiveCompareOp::kLessThan:
            op = "LessThanSigned";
            break;
          case PrimitiveCompareOp::kGreaterThanEqualUnsigned:
            op = "GreaterThanEqualUnsigned";
            break;
          case PrimitiveCompareOp::kGreaterThanEqual:
            op = "GreaterThanEqualSigned";
            break;
          case PrimitiveCompareOp::kLessThanEqualUnsigned:
            op = "LessThanEqualUnsigned";
            break;
          case PrimitiveCompareOp::kLessThanEqual:
            op = "LessThanEqualSigned";
            break;
          default:
            JIT_CHECK(false, "not implemented %d", (int)instr->op());
            break;
        }

        if (instr->left()->type() <= TCDouble ||
            instr->right()->type() <= TCDouble) {
          // Manually format the code string, otherwise registers with literal
          // values end up being treated as immediates, and there's no way to
          // load immediates in an XMM register.
          auto codestr = fmt::format(
              "{} {}, {}:{}, {}:{}",
              op,
              instr->dst(),
              instr->left()->name(),
              instr->left()->type().unspecialized(),
              instr->right()->name(),
              instr->right()->type().unspecialized());
          bbb.AppendCode(codestr);
        } else {
          bbb.AppendCode(
              "{} {} {} {}", op, instr->dst(), instr->left(), instr->right());
        }
        break;
      }
      case Opcode::kPrimitiveBox: {
        auto instr = static_cast<const PrimitiveBox*>(&i);
        std::string src = instr->value()->name();
        Type src_type = instr->value()->type();
        Type box_type = instr->type();
        std::string tmp = GetSafeTempName();
        uint64_t func = 0;

        if (box_type <= TCEnum) {
          JIT_DCHECK(
              src_type <= TCInt64,
              "unboxed enums are represented as int64 in the JIT");
          bbb.AppendCode(
              "Call {}, {:#x}, {}:{}, {:#x}:CUInt64",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(JITRT_BoxEnum),
              src,
              src_type,
              reinterpret_cast<uint64_t>(box_type.typeSpec()));
          break;
        }

        if (src_type == TNullptr) {
          // special case for an uninitialized variable, we'll
          // load zero
          bbb.AppendCode(
              "Call {}, {:#x}, 0",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(JITRT_BoxI64));
          break;
        } else if (src_type <= (TCUInt64 | TNullptr)) {
          func = reinterpret_cast<uint64_t>(JITRT_BoxU64);
        } else if (src_type <= (TCInt64 | TNullptr)) {
          func = reinterpret_cast<uint64_t>(JITRT_BoxI64);
        } else if (src_type <= (TCUInt32 | TNullptr)) {
          func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
        } else if (src_type <= (TCInt32 | TNullptr)) {
          func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
        } else if (src_type <= (TCDouble)) {
          func = reinterpret_cast<uint64_t>(JITRT_BoxDouble);
        } else if (src_type <= (TCBool | TCUInt8 | TCUInt16 | TNullptr)) {
          bbb.AppendCode(
              "ConvertUnsigned {}:CUInt32, {}:{}", tmp, src, src_type);
          src = tmp;
          func = reinterpret_cast<uint64_t>(
              (src_type <= TCBool) ? JITRT_BoxBool : JITRT_BoxU32);
          src_type = TCUInt32;
        } else if (src_type <= (TCInt8 | TCInt16 | TNullptr)) {
          bbb.AppendCode("Convert {}:CInt32, {}:{}", tmp, src, src_type);
          src = tmp;
          src_type = TCInt32;
          func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
        }

        JIT_CHECK(
            func != 0, "unknown box type %s", src_type.toString().c_str());

        bbb.AppendCode(
            "Call {}, {:#x}, {}:{}", instr->GetOutput(), func, src, src_type);

        break;
      }

      case Opcode::kIsNegativeAndErrOccurred: {
        auto instr = static_cast<const IsNegativeAndErrOccurred*>(&i);
        std::string src_name = instr->reg()->name();
        Type src_type = instr->reg()->type();
        uint64_t func = 0;

        // Because a failed unbox to unsigned smuggles the bit pattern for a
        // signed -1 in the unsigned value, we can likewise just treat unsigned
        // as signed for purposes of checking for -1 here.
        if (src_type <= (TCInt64 | TCUInt64)) {
          func = reinterpret_cast<uint64_t>(JITRT_IsNegativeAndErrOccurred_64);
        } else {
          func = reinterpret_cast<uint64_t>(JITRT_IsNegativeAndErrOccurred_32);
          // We do have to widen to at least 32 bits due to calling convention
          // always passing a minimum of 32 bits.
          if (src_type <= (TCBool | TCInt8 | TCUInt8 | TCInt16 | TCUInt16)) {
            std::string tmp_name = GetSafeTempName();
            bbb.AppendCode(
                "Convert {}:CInt32, {}:{}", tmp_name, src_name, src_type);
            src_name = tmp_name;
            src_type = TCInt32;
          }
        }
        bbb.AppendCode(
            "Call {}, {:#x}, {}:{}", instr->dst(), func, src_name, src_type);
        break;
      }

      case Opcode::kPrimitiveUnbox: {
        auto instr = static_cast<const PrimitiveUnbox*>(&i);
        Type ty = instr->type();
        uint64_t func = 0;
        if (ty <= TCBool) {
          uint64_t true_addr = reinterpret_cast<uint64_t>(Py_True);
          bbb.AppendCode(
              "Equal {} {} {:#x}", instr->dst(), instr->value(), true_addr);
        } else if (ty <= TCDouble) {
          // For doubles, we can directly load the offset into the destination.
          bbb.AppendCode(
              "Load {}, {}, {}",
              instr->dst(),
              instr->value(),
              offsetof(PyFloatObject, ob_fval));
        } else if (ty <= TCUInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxU64);
        } else if (ty <= TCUInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxU32);
        } else if (ty <= TCUInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxU16);
        } else if (ty <= TCUInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxU8);
        } else if (ty <= TCInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxI64);
        } else if (ty <= TCInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxI32);
        } else if (ty <= TCInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxI16);
        } else if (ty <= TCInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxI8);
        } else if (ty <= TCEnum) {
          func = reinterpret_cast<uint64_t>(JITRT_UnboxEnum);
        } else {
          JIT_CHECK(false, "Cannot unbox type %s", ty.toString().c_str());
        }

        if (func) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}", instr->dst(), func, instr->value());
        }

        break;
      }
      case Opcode::kPrimitiveUnaryOp: {
        auto instr = static_cast<const PrimitiveUnaryOp*>(&i);
        switch (instr->op()) {
          case PrimitiveUnaryOpKind::kNegateInt:
            bbb.AppendCode("Negate {}, {}", instr->GetOutput(), instr->value());
            break;
          case PrimitiveUnaryOpKind::kInvertInt:
            bbb.AppendCode("Invert {}, {}", instr->GetOutput(), instr->value());
            break;
          case PrimitiveUnaryOpKind::kNotInt:
            bbb.AppendCode(
                "Equal {}, {}, 0", instr->GetOutput(), instr->value());
            break;
          default:
            JIT_CHECK(false, "not implemented unary op %d", (int)instr->op());
            break;
        }
        break;
      }
      case Opcode::kReturn: {
        // TODO support constant operand to Return
        Register* reg = i.GetOperand(0);
        bbb.AppendCode(
            "Return {}:{}", reg->name(), reg->type().unspecialized());
        break;
      }
      case Opcode::kSetCurrentAwaiter: {
        bbb.AppendCode(
            "Invoke {:#x}, {}, __asm_tstate",
            reinterpret_cast<int64_t>(JITRT_SetCurrentAwaiter),
            i.GetOperand(0));
        break;
      }
      case Opcode::kYieldValue: {
        auto instr = static_cast<const YieldValue*>(&i);
        std::stringstream ss;
        ss << "YieldValue " << instr->dst()->name() << ", __asm_tstate,"
           << instr->reg()->name();
        append_yield_live_regs(ss, instr);
        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kInitialYield: {
        auto instr = static_cast<const InitialYield*>(&i);
        std::stringstream ss;
        ss << "YieldInitial " << instr->dst()->name() << ", __asm_tstate, ";
        append_yield_live_regs(ss, instr);
        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kYieldAndYieldFrom:
      case Opcode::kYieldFrom: {
        std::stringstream ss;
        ss << (opcode == Opcode::kYieldFrom ? "YieldFrom "
                                            : "YieldFromSkipInitialSend ")
           << i.GetOutput()->name() << ", __asm_tstate, "
           << i.GetOperand(0)->name() << ", " << i.GetOperand(1)->name();
        append_yield_live_regs(ss, static_cast<const YieldBase*>(&i));
        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kAssign: {
        auto instr = static_cast<const Assign*>(&i);
        bbb.AppendCode("Assign {}, {}", instr->dst(), instr->reg());
        break;
      }
      case Opcode::kCondBranch:
      case Opcode::kCondBranchIterNotDone: {
        auto instr = static_cast<const CondBranchBase*>(&i);

        auto cond = instr->GetOperand(0);
        auto tmp = cond->name();

        if (instr->opcode() == Opcode::kCondBranchIterNotDone) {
          tmp = GetSafeTempName();
          auto iter_done_addr =
              reinterpret_cast<uint64_t>(&jit::g_iterDoneSentinel);
          bbb.AppendCode("Sub {}, {}, {}", tmp, cond, iter_done_addr);
        }

        bbb.AppendCode(
            "CondBranch {}, {}, {}",
            tmp,
            instr->true_bb()->id,
            instr->false_bb()->id);
        break;
      }
      case Opcode::kCondBranchCheckType: {
        auto& instr = static_cast<const CondBranchCheckType&>(i);
        auto type = instr.type();
        auto eq_res_var = GetSafeTempName();
        if (type.isExact()) {
          auto type_var = GetSafeTempName();
          bbb.AppendCode(
              "Load {}, {}, {}",
              type_var,
              instr.reg(),
              offsetof(PyObject, ob_type));
          bbb.AppendCode(
              "Equal {}, {}, {:#x}",
              eq_res_var,
              type_var,
              reinterpret_cast<uint64_t>(instr.type().uniquePyType()));
        } else {
          emitSubclassCheck(bbb, eq_res_var, instr.GetOperand(0), type);
        }
        bbb.AppendCode(
            "CondBranch {}, {}, {}",
            eq_res_var,
            instr.true_bb()->id,
            instr.false_bb()->id);
        break;
      }
      case Opcode::kDeleteAttr: {
        std::string tmp = GetSafeTempName();
        auto instr = static_cast<const DeleteAttr*>(&i);
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());
        bbb.AppendCode(
            "Call {}:CInt32, {}, {}, {}, 0",
            tmp,
            reinterpret_cast<uint64_t>(PyObject_SetAttr),
            instr->GetOperand(0),
            reinterpret_cast<uint64_t>(name));
        bbb.AppendCode(MakeGuard("NotNegative", *instr, tmp));
        break;
      }
      case Opcode::kLoadAttr: {
        auto instr = static_cast<const LoadAttr*>(&i);
        std::string tmp_id = GetSafeTempName();
        auto func = reinterpret_cast<uint64_t>(&jit::LoadAttrCache::invoke);
        auto cache = env_->code_rt->AllocateLoadAttrCache();
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());

        bbb.AppendCode(
            "Move {0}, {1:#x}\n"
            "Call {2}, {3:#x}, {4:#x}, {5}, {0}",
            tmp_id,
            reinterpret_cast<uint64_t>(name),
            instr->dst(),
            func,
            reinterpret_cast<uint64_t>(cache),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kLoadAttrSpecial: {
        auto instr = static_cast<const LoadAttrSpecial*>(&i);
        bbb.AppendCode(
            "Call {}, {}, __asm_tstate, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<intptr_t>(special_lookup),
            instr->GetOperand(0),
            reinterpret_cast<intptr_t>(instr->id()));
        break;
      }
      case Opcode::kLoadTypeAttrCacheItem: {
        auto instr = static_cast<const LoadTypeAttrCacheItem*>(&i);
        auto cache = env_->code_rt->getLoadTypeAttrCache(instr->cache_id());
        auto addr =
            reinterpret_cast<uint64_t>(&(cache->items[instr->item_idx()]));
        bbb.AppendCode("Load {}, {:#x}", instr->GetOutput(), addr);
        break;
      }
      case Opcode::kFillTypeAttrCache: {
        auto instr = static_cast<const FillTypeAttrCache*>(&i);
        auto cache = reinterpret_cast<uint64_t>(
            env_->code_rt->getLoadTypeAttrCache(instr->cache_id()));
        auto func = reinterpret_cast<uint64_t>(&jit::LoadTypeAttrCache::invoke);
        std::string tmp_id = GetSafeTempName();
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());
        bbb.AppendCode(
            "Move {}, {:#x}", tmp_id, reinterpret_cast<uint64_t>(name));
        bbb.AppendCode(
            "Call {}, {:#x}, {:#x}, {}, {}",
            instr->GetOutput(),
            func,
            cache,
            instr->receiver(),
            tmp_id);
        break;
      }
      case Opcode::kLoadMethod: {
        auto instr = static_cast<const LoadMethod*>(&i);

        std::string tmp_id = GetSafeTempName();
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());

        bbb.AppendCode(
            "Move {}, {:#x}", tmp_id, reinterpret_cast<uint64_t>(name));

        if (env_->optimizable_load_call_methods_.count(instr)) {
          auto func = reinterpret_cast<uint64_t>(JITRT_GetMethod);
          auto cache_entry = env_->code_rt->AllocateLoadMethodCache();
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {:#x}\n",
              instr->dst(),
              func,
              instr->receiver(),
              tmp_id,
              reinterpret_cast<uint64_t>(cache_entry));
        } else {
          auto func = reinterpret_cast<uint64_t>(PyObject_GetAttr);
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}\n",
              instr->dst(),
              func,
              instr->receiver(),
              tmp_id);
        }
        break;
      }
      case Opcode::kLoadMethodSuper: {
        auto instr = static_cast<const LoadMethodSuper*>(&i);
        std::string tmp_id = GetSafeTempName();
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());
        bbb.AppendCode(
            "Move {}, {:#x}", tmp_id, reinterpret_cast<uint64_t>(name));

        if (env_->optimizable_load_call_methods_.count(instr)) {
          auto func = reinterpret_cast<uint64_t>(JITRT_GetMethodFromSuper);
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {}, {}, {}\n",
              instr->dst(),
              func,
              instr->global_super(),
              instr->type(),
              instr->receiver(),
              tmp_id,
              instr->no_args_in_super_call() ? 1 : 0);
        } else {
          auto func = reinterpret_cast<uint64_t>(JITRT_GetAttrFromSuper);
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {}, {}, {}\n",
              instr->dst(),
              func,
              instr->global_super(),
              instr->type(),
              instr->receiver(),
              tmp_id,
              instr->no_args_in_super_call() ? 1 : 0);
        }
        break;
      }
      case Opcode::kLoadAttrSuper: {
        auto instr = static_cast<const LoadAttrSuper*>(&i);
        std::string tmp_id = GetSafeTempName();
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());

        bbb.AppendCode(
            "Move {}, {:#x}", tmp_id, reinterpret_cast<uint64_t>(name));

        auto func = reinterpret_cast<uint64_t>(JITRT_GetAttrFromSuper);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}, {}, {}\n",
            instr->dst(),
            func,
            instr->global_super(),
            instr->type(),
            instr->receiver(),
            tmp_id,
            instr->no_args_in_super_call() ? 1 : 0);

        break;
      }
      case Opcode::kBinaryOp: {
        auto bin_op = static_cast<const BinaryOp*>(&i);

        // NB: This needs to be in the order that the values appear in the
        // BinaryOpKind enum
        static const uint64_t helpers[] = {
            reinterpret_cast<uint64_t>(PyNumber_Add),
            reinterpret_cast<uint64_t>(PyNumber_And),
            reinterpret_cast<uint64_t>(PyNumber_FloorDivide),
            reinterpret_cast<uint64_t>(PyNumber_Lshift),
            reinterpret_cast<uint64_t>(PyNumber_MatrixMultiply),
            reinterpret_cast<uint64_t>(PyNumber_Remainder),
            reinterpret_cast<uint64_t>(PyNumber_Multiply),
            reinterpret_cast<uint64_t>(PyNumber_Or),
            reinterpret_cast<uint64_t>(PyNumber_Power),
            reinterpret_cast<uint64_t>(PyNumber_Rshift),
            reinterpret_cast<uint64_t>(PyObject_GetItem),
            reinterpret_cast<uint64_t>(PyNumber_Subtract),
            reinterpret_cast<uint64_t>(PyNumber_TrueDivide),
            reinterpret_cast<uint64_t>(PyNumber_Xor),
        };
        JIT_CHECK(
            static_cast<unsigned long>(bin_op->op()) < sizeof(helpers),
            "unsupported binop");
        auto op_kind = static_cast<int>(bin_op->op());

        if (bin_op->op() != BinaryOpKind::kPower) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}",
              bin_op->dst(),
              helpers[op_kind],
              bin_op->left(),
              bin_op->right());
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {:#x}",
              bin_op->dst(),
              helpers[op_kind],
              bin_op->left(),
              bin_op->right(),
              reinterpret_cast<uint64_t>(Py_None));
        }
        break;
      }
      case Opcode::kLongBinaryOp: {
        auto instr = static_cast<const LongBinaryOp*>(&i);

        // NB: This needs to be in the order that the values appear in the
        // BinaryOpKind enum
        static const uint64_t helpers[] = {
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_add),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_and),
            reinterpret_cast<uint64_t>(
                PyLong_Type.tp_as_number->nb_floor_divide),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_lshift),
            0, // unsupported: matrix multiply
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_remainder),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_multiply),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_or),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_power),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_rshift),
            0, // unsupported: getitem
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_subtract),
            reinterpret_cast<uint64_t>(
                PyLong_Type.tp_as_number->nb_true_divide),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_as_number->nb_xor),
        };
        auto op_kind = static_cast<unsigned long>(instr->op());
        JIT_CHECK(op_kind < sizeof(helpers), "unsupported binop");
        uint64_t helper = helpers[op_kind];
        JIT_CHECK(helper != 0, "unsupported binop");
        if (instr->op() == BinaryOpKind::kPower) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {:#x}",
              instr->dst(),
              helper,
              instr->left(),
              instr->right(),
              reinterpret_cast<uint64_t>(Py_None));
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}",
              instr->dst(),
              helper,
              instr->left(),
              instr->right());
        }
        break;
      }
      case Opcode::kUnaryOp: {
        auto unary_op = static_cast<const UnaryOp*>(&i);

        // NB: This needs to be in the order that the values appear in the
        // UnaryOpKind enum
        static const uint64_t helpers[] = {
            reinterpret_cast<uint64_t>(JITRT_UnaryNot),
            reinterpret_cast<uint64_t>(PyNumber_Negative),
            reinterpret_cast<uint64_t>(PyNumber_Positive),
            reinterpret_cast<uint64_t>(PyNumber_Invert),
        };
        JIT_CHECK(
            static_cast<unsigned long>(unary_op->op()) < sizeof(helpers),
            "unsupported unaryop");

        auto op_kind = static_cast<int>(unary_op->op());
        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            unary_op->dst(),
            helpers[op_kind],
            unary_op->operand());
        break;
      }
      case Opcode::kIsSubtype: {
        auto instr = static_cast<const IsSubtype*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyType_IsSubtype),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kIsInstance: {
        auto instr = static_cast<const IsInstance*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyObject_IsInstance),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kCompare: {
        auto instr = static_cast<const Compare*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(cmp_outcome),
            static_cast<int>(instr->op()),
            instr->left(),
            instr->right());
        break;
      }
      case Opcode::kLongCompare: {
        auto instr = static_cast<const LongCompare*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyLong_Type.tp_richcompare),
            instr->left(),
            instr->right(),
            static_cast<int>(instr->op()));
        break;
      }
      case Opcode::kCompareBool: {
        auto instr = static_cast<const CompareBool*>(&i);

        if (instr->op() == CompareOp::kIn) {
          if (instr->right()->type() <= TUnicodeExact) {
            bbb.AppendCode(
                "Call {}, {:#x}, {}, {}",
                instr->dst(),
                reinterpret_cast<uint64_t>(PyUnicode_Contains),
                instr->right(),
                instr->left());
          } else {
            bbb.AppendCode(
                "Call {}, {:#x}, {}, {}",
                instr->dst(),
                reinterpret_cast<uint64_t>(PySequence_Contains),
                instr->right(),
                instr->left());
          }
        } else if (instr->op() == CompareOp::kNotIn) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}",
              instr->dst(),
              reinterpret_cast<uint64_t>(JITRT_NotContains),
              instr->right(),
              instr->left());
        } else if (
            (instr->op() == CompareOp::kEqual ||
             instr->op() == CompareOp::kNotEqual) &&
            (instr->left()->type() <= TUnicodeExact ||
             instr->right()->type() <= TUnicodeExact)) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {}",
              instr->dst(),
              reinterpret_cast<uint64_t>(JITRT_UnicodeEquals),
              instr->left(),
              instr->right(),
              static_cast<int>(instr->op()));
        } else if (
            (instr->op() == CompareOp::kEqual ||
             instr->op() == CompareOp::kNotEqual) &&
            (isTypeWithReasonablePointerEq(instr->left()->type()) ||
             isTypeWithReasonablePointerEq(instr->right()->type()))) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {}",
              instr->dst(),
              reinterpret_cast<uint64_t>(PyObject_RichCompareBool),
              instr->left(),
              instr->right(),
              static_cast<int>(instr->op()));
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {}, {}",
              instr->dst(),
              reinterpret_cast<uint64_t>(JITRT_RichCompareBool),
              instr->left(),
              instr->right(),
              static_cast<int>(instr->op()));
        }
        break;
      }
      case Opcode::kIncref: {
        MakeIncref(bbb, i, false);
        break;
      }
      case Opcode::kXIncref: {
        MakeIncref(bbb, i, true);
        break;
      }
      case Opcode::kDecref: {
        MakeDecref(bbb, i, false);
        break;
      }
      case Opcode::kXDecref: {
        MakeDecref(bbb, i, true);
        break;
      }
      case Opcode::kBatchDecref: {
        auto instr = static_cast<const BatchDecref*>(&i);

        std::stringstream ss;
        ss << "BatchDecref ";
        auto nargs = instr->NumOperands();
        for (size_t i = 0; i < nargs; i++) {
          ss << (i == 0 ? "" : ", ") << *instr->GetOperand(i);
        }
        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kDeopt: {
        bbb.AppendCode(
            MakeGuard("AlwaysFail", static_cast<const DeoptBase&>(i)));
        break;
      }
      case Opcode::kDeoptPatchpoint: {
        const auto& instr = static_cast<const DeoptPatchpoint&>(i);
        auto deopt_meta = jit::DeoptMetadata::fromInstr(
            instr, env_->optimizable_load_call_methods_, env_->code_rt);
        auto id = env_->rt->addDeoptMetadata(std::move(deopt_meta));
        std::stringstream ss;
        ss << "DeoptPatchpoint " << static_cast<void*>(instr.patcher()) << ", "
           << id;
        auto& regstates = instr.live_regs();
        for (const auto& reg_state : regstates) {
          ss << ", " << *reg_state.reg;
        }
        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kRaiseAwaitableError: {
        const auto& instr = static_cast<const RaiseAwaitableError&>(i);
        bbb.AppendCode(
            "Invoke {}, __asm_tstate, {}, {}",
            reinterpret_cast<intptr_t>(format_awaitable_error),
            instr.GetOperand(0),
            instr.with_opcode());
        bbb.AppendCode(MakeGuard("AlwaysFail", instr));
        break;
      }
      case Opcode::kCheckExc:
      case Opcode::kCheckField:
      case Opcode::kCheckFreevar:
      case Opcode::kCheckNeg:
      case Opcode::kCheckVar:
      case Opcode::kGuard:
      case Opcode::kGuardIs: {
        const auto& instr = static_cast<const DeoptBase&>(i);
        std::string kind = "NotZero";
        if (instr.IsCheckNeg()) {
          kind = "NotNegative";
        } else if (instr.IsGuardIs()) {
          kind = "Is";
        }
        bbb.AppendCode(MakeGuard(kind, instr, instr.GetOperand(0)->name()));
        break;
      }
      case Opcode::kGuardType: {
        const auto& instr = static_cast<const DeoptBase&>(i);
        bbb.AppendCode(
            MakeGuard("HasType", instr, instr.GetOperand(0)->name()));
        break;
      }
      case Opcode::kRefineType: {
        break;
      }
      case Opcode::kLoadGlobalCached: {
        ThreadedCompileSerialize guard;
        auto instr = static_cast<const LoadGlobalCached*>(&i);
        PyObject* globals = instr->globals();
        env_->code_rt->addReference(globals);
        PyObject* name =
            PyTuple_GET_ITEM(instr->code()->co_names, instr->name_idx());
        auto cache = env_->rt->findGlobalCache(globals, name);
        bbb.AppendCode(
            "Load {}, {:#x}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(cache.valuePtr()));
        break;
      }
      case Opcode::kLoadGlobal: {
        auto instr = static_cast<const LoadGlobal*>(&i);
        PyObject* builtins = instr->frameState()->builtins;
        env_->code_rt->addReference(builtins);
        PyObject* globals = instr->frameState()->globals;
        env_->code_rt->addReference(globals);
        PyObject* name = PyTuple_GET_ITEM(
            instr->frameState()->code->co_names, instr->name_idx());
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(JITRT_LoadGlobal),
            globals,
            builtins,
            name);
        break;
      }
      case Opcode::kStoreAttr: {
        auto instr = static_cast<const StoreAttr*>(&i);

        std::string tmp_id = GetSafeTempName();

        PyCodeObject* code = instr->frameState()->code;
        auto ob_item =
            reinterpret_cast<PyTupleObject*>(code->co_names)->ob_item;
        StoreAttrCache* cache = env_->code_rt->allocateStoreAttrCache();
        bbb.AppendCode(
            "Call {}, {:#x}, {:#x}, {}, {:#x}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(&jit::StoreAttrCache::invoke),
            reinterpret_cast<uint64_t>(cache),
            instr->GetOperand(0),
            reinterpret_cast<uint64_t>(ob_item[instr->name_idx()]),
            instr->GetOperand(1));

        break;
      }
      case Opcode::kVectorCall: {
        auto& instr = static_cast<const VectorCallBase&>(i);
        if (TranslateSpecializedCall(bbb, instr)) {
          break;
        }
        size_t flags = instr.isAwaited() ? _Py_AWAITED_CALL_MARKER : 0;
        emitVectorCall(bbb, instr, flags, false);
        break;
      }
      case Opcode::kVectorCallKW: {
        auto& instr = static_cast<const VectorCallBase&>(i);
        size_t flags = instr.isAwaited() ? _Py_AWAITED_CALL_MARKER : 0;
        emitVectorCall(bbb, instr, flags, true);
        break;
      }
      case Opcode::kVectorCallStatic: {
        auto& instr = static_cast<const VectorCallBase&>(i);
        if (TranslateSpecializedCall(bbb, instr)) {
          break;
        }
        size_t flags = _Py_VECTORCALL_INVOKED_STATICALLY |
            (instr.isAwaited() ? _Py_AWAITED_CALL_MARKER : 0);
        emitVectorCall(bbb, instr, flags, false);
        break;
      }
      case Opcode::kCallCFunc: {
        auto& instr = static_cast<const CallCFunc&>(i);

        std::stringstream ss;
        ss << "Call " << *instr.dst() << ", " << instr.funcAddr();
        for (size_t i = 0; i < instr.NumOperands(); i++) {
          ss << ", " << *instr.GetOperand(i);
        }

        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kCallEx: {
        auto& instr = static_cast<const CallEx&>(i);
        auto rt_helper = instr.isAwaited() ? JITRT_CallFunctionExAwaited
                                           : JITRT_CallFunctionEx;
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, 0",
            instr.dst()->name(),
            reinterpret_cast<uint64_t>(rt_helper),
            instr.func()->name(),
            instr.pargs()->name());
        break;
      }
      case Opcode::kCallExKw: {
        auto& instr = static_cast<const CallExKw&>(i);
        auto rt_helper = instr.isAwaited() ? JITRT_CallFunctionExAwaited
                                           : JITRT_CallFunctionEx;
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr.dst()->name(),
            reinterpret_cast<uint64_t>(rt_helper),
            instr.func()->name(),
            instr.pargs()->name(),
            instr.kwargs()->name());
        break;
      }
      case Opcode::kCallMethod: {
        auto instr = static_cast<const CallMethod*>(&i);

        std::string s = fmt::format("Vectorcall {}", *instr->dst());
        size_t flags = instr->isAwaited() ? _Py_AWAITED_CALL_MARKER : 0;
        if (env_->optimizable_load_call_methods_.count(instr)) {
          format_to(
              s,
              ", {}, {}, {}, {}",
              reinterpret_cast<uint64_t>(JITRT_CallMethod),
              flags,
              *instr->func(),
              *instr->self());
        } else {
          format_to(
              s,
              ", {}, {}, {}",
              reinterpret_cast<uint64_t>(_PyObject_Vectorcall),
              flags,
              *instr->func());
        }
        for (size_t i = 0, nargs = instr->NumArgs(); i < nargs; i++) {
          format_to(s, ", {}", *instr->arg(i));
        }

        bbb.AppendCode(s + ", 0"); /* kwnames */
        break;
      }

      case Opcode::kCallStatic: {
        auto instr = static_cast<const CallStatic*>(&i);
        auto nargs = instr->NumOperands();

        std::stringstream ss;
        ss << "Call " << instr->dst()->name() << ", "
           << reinterpret_cast<uint64_t>(instr->addr());

        for (size_t i = 0; i < nargs; i++) {
          Type src_type = instr->GetOperand(i)->type();
          if (src_type <= (TCBool | TCUInt8 | TCUInt16)) {
            std::string tmp = GetSafeTempName();
            bbb.AppendCode(
                "ConvertUnsigned {}:CUInt64, {}", tmp, instr->GetOperand(i));
            ss << ", " << tmp << ":CUInt64";
          } else if (src_type <= (TCInt8 | TCInt16)) {
            std::string tmp = GetSafeTempName();
            bbb.AppendCode("Convert {}:CInt64, {}", tmp, instr->GetOperand(i));
            ss << ", " << tmp << ":CInt64";
          } else {
            ss << ", " << instr->GetOperand(i)->name();
          }
        }

        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kCallStaticRetVoid: {
        auto instr = static_cast<const CallStaticRetVoid*>(&i);
        auto nargs = instr->NumOperands();

        std::stringstream ss;
        ss << "Invoke " << reinterpret_cast<uint64_t>(instr->addr());

        for (size_t i = 0; i < nargs; i++) {
          ss << ", " << instr->GetOperand(i)->name();
        }

        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kInvokeStaticFunction: {
        ThreadedCompileSerialize guard;

        auto instr = static_cast<const InvokeStaticFunction*>(&i);
        auto nargs = instr->NumOperands();
        PyFunctionObject* func = instr->func();

        std::stringstream ss;
        JIT_CHECK(
            !usesRuntimeFunc(func->func_code),
            "Can't statically invoke given function: %s",
            PyUnicode_AsUTF8(func->func_qualname));
        if (_PyJIT_IsCompiled((PyObject*)func)) {
          ss << fmt::format(
              "Call {}, {}",
              instr->dst(),
              reinterpret_cast<uint64_t>(
                  JITRT_GET_STATIC_ENTRY(func->vectorcall)));
        } else {
          void** indir = env_->rt->findFunctionEntryCache(func);
          env_->function_indirections.emplace(func, indir);
          std::string tmp_id = GetSafeTempName();
          bbb.AppendCode(
              "Load {}, {:#x}", tmp_id, reinterpret_cast<uint64_t>(indir));
          ss << "Call " << instr->dst()->name() << ", " << tmp_id;
        }

        for (size_t i = 0; i < nargs; i++) {
          ss << ", " << instr->GetOperand(i)->name();
        }

        bbb.AppendCode(ss.str());

        // functions that return primitives will signal error via edx/xmm1
        std::string err_indicator;
        Type ret_type = instr->ret_type();
        if (ret_type <= TCDouble) {
          err_indicator = "reg:xmm1";
        } else if (ret_type <= TPrimitive) {
          err_indicator = "reg:edx";
        } else {
          err_indicator = instr->GetOutput()->name();
        }
        bbb.AppendCode(MakeGuard(
            "NotZero", static_cast<const DeoptBase&>(i), err_indicator));
        break;
      }

      case Opcode::kInvokeMethod: {
        auto instr = static_cast<const InvokeMethod*>(&i);

        std::stringstream ss;
        size_t flags = instr->isAwaited() ? _Py_AWAITED_CALL_MARKER : 0;
        if (instr->isClassmethod()) {
          ss << "Vectorcall " << *instr->dst() << ", "
             << reinterpret_cast<uint64_t>(JITRT_InvokeClassMethod) << ", "
             << flags << ", " << instr->slot();
        } else {
          ss << "Vectorcall " << *instr->dst() << ", "
             << reinterpret_cast<uint64_t>(JITRT_InvokeMethod) << ", " << flags
             << ", " << instr->slot();
        }

        auto nargs = instr->NumOperands();
        for (size_t i = 0; i < nargs; i++) {
          ss << ", " << *instr->GetOperand(i);
        }

        ss << ", 0"; /* kwnames */

        bbb.AppendCode(ss.str());
        break;
      }

      case Opcode::kLoadField: {
        auto instr = static_cast<const LoadField*>(&i);
        bbb.AppendCode(
            "Load {}, {}, {}",
            instr->GetOutput(),
            instr->receiver(),
            instr->offset());
        break;
      }

      case Opcode::kLoadFieldAddress: {
        auto instr = static_cast<const LoadFieldAddress*>(&i);
        bbb.AppendCode(
            "Lea {}, {}, {}",
            instr->GetOutput(),
            instr->object(),
            instr->offset());
        break;
      }

      case Opcode::kStoreField: {
        auto instr = static_cast<const StoreField*>(&i);
        bbb.AppendCode(
            "Store {}, {}, {:#x}",
            instr->value(),
            instr->receiver(),
            instr->offset());
        break;
      }

      case Opcode::kCast: {
        uint64_t func;
        auto instr = static_cast<const Cast*>(&i);
        bool pass_type = true;
        if (instr->iserror()) {
          if (instr->pytype() == &PyFloat_Type) {
            pass_type = false;
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_CastToFloatOptional
                                  : JITRT_CastToFloat);
          } else if (instr->exact()) {
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_CastOptionalExact : JITRT_CastExact);
          } else {
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_CastOptional : JITRT_Cast);
          }
        } else {
          if (instr->pytype() == &PyFloat_Type) {
            pass_type = false;
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_TypeCheckFloatOptional
                                  : JITRT_TypeCheckFloat);
          } else if (instr->exact()) {
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_TypeCheckOptionalExact
                                  : JITRT_TypeCheckExact);
          } else {
            func = reinterpret_cast<uint64_t>(
                instr->optional() ? JITRT_TypeCheckOptional : JITRT_TypeCheck);
          }
        }

        if (pass_type) {
          bbb.AppendCode(
              "Call {}, {:#x}, {}, {:#x}\n",
              instr->dst(),
              func,
              instr->value(),
              reinterpret_cast<uint64_t>(instr->pytype()));
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {}\n", instr->dst(), func, instr->value());
        }
        break;
      }

      case Opcode::kTpAlloc: {
        auto instr = static_cast<const TpAlloc*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {:#x}, {:#x}\n",
            instr->dst(),
            reinterpret_cast<uint64_t>(instr->pytype()->tp_alloc),
            reinterpret_cast<uint64_t>(instr->pytype()),
            /*nitems=*/0);
        break;
      }

      case Opcode::kMakeListTuple: {
        auto instr = static_cast<const MakeListTuple*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->dst(),
            instr->is_tuple() ? reinterpret_cast<uint64_t>(PyTuple_New)
                              : reinterpret_cast<uint64_t>(PyList_New),
            instr->nvalues());
        break;
      }
      case Opcode::kInitListTuple: {
        auto instr = static_cast<const InitListTuple*>(&i);
        auto is_tuple = instr->is_tuple();

        std::string base = instr->GetOperand(0)->name();

        std::string tmp_id = GetSafeTempName();
        if (!is_tuple && instr->NumOperands() > 1) {
          bbb.AppendCode(
              "Load {}, {}, {}", tmp_id, base, offsetof(PyListObject, ob_item));
          base = std::move(tmp_id);
        }

        const size_t ob_item_offset =
            is_tuple ? offsetof(PyTupleObject, ob_item) : 0;
        for (size_t i = 1; i < instr->NumOperands(); i++) {
          bbb.AppendCode(
              "Store {}, {}, {}",
              instr->GetOperand(i),
              base,
              ob_item_offset + ((i - 1) * kPointerSize));
        }
        break;
      }
      case Opcode::kLoadTupleItem: {
        auto instr = static_cast<const LoadTupleItem*>(&i);

        const size_t item_offset =
            offsetof(PyTupleObject, ob_item) + instr->idx() * kPointerSize;
        bbb.AppendCode(
            "Load {} {} {}", instr->GetOutput(), instr->tuple(), item_offset);
        break;
      }
      case Opcode::kCheckSequenceBounds: {
        auto instr = static_cast<const CheckSequenceBounds*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(_PySequence_CheckBounds),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kLoadArrayItem: {
        auto instr = static_cast<const LoadArrayItem*>(&i);
        auto type = instr->type();
        uint64_t func = 0;
        if (type <= TObject && instr->idx()->type().hasIntSpec()) {
          // TODO: We could support more array types here, or in general
          // attempt to support more array types w/ register inputs w/o
          // calling to a helper.
          const size_t item_offset =
              instr->idx()->type().intSpec() * kPointerSize + instr->offset();
          bbb.AppendCode(
              "Load {} {} {}",
              instr->GetOutput(),
              instr->ob_item(),
              item_offset);
          break;
        } else if (type <= TCInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_GetI8_FromArray);
        } else if (type <= TCUInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_GetU8_FromArray);
        } else if (type <= TCInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_GetI16_FromArray);
        } else if (type <= TCUInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_GetU16_FromArray);
        } else if (type <= TCInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_GetI32_FromArray);
        } else if (type <= TCUInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_GetU32_FromArray);
        } else if (type <= TCInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_GetI64_FromArray);
        } else if (type <= TCUInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_GetU64_FromArray);
        } else if (type <= TObject) {
          func = reinterpret_cast<uint64_t>(JITRT_GetObj_FromArray);
        }
        JIT_CHECK(func != 0, "unknown array type %s", type.toString().c_str());

        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {:#x}",
            instr->dst(),
            func,
            instr->ob_item(),
            instr->idx(),
            instr->offset());
        break;
      }
      case Opcode::kStoreArrayItem: {
        auto instr = static_cast<const StoreArrayItem*>(&i);
        auto type = instr->type();
        uint64_t func = 0;

        if (type <= TCInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_SetI8_InArray);
        } else if (type <= TCUInt8) {
          func = reinterpret_cast<uint64_t>(JITRT_SetU8_InArray);
        } else if (type <= TCInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_SetI16_InArray);
        } else if (type <= TCUInt16) {
          func = reinterpret_cast<uint64_t>(JITRT_SetU16_InArray);
        } else if (type <= TCInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_SetI32_InArray);
        } else if (type <= TCUInt32) {
          func = reinterpret_cast<uint64_t>(JITRT_SetU32_InArray);
        } else if (type <= TCInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_SetI64_InArray);
        } else if (type <= TCUInt64) {
          func = reinterpret_cast<uint64_t>(JITRT_SetU64_InArray);
        } else if (type <= TObject) {
          func = reinterpret_cast<uint64_t>(JITRT_SetObj_InArray);
        }
        JIT_CHECK(func != 0, "unknown array type %s", type.toString().c_str());

        bbb.AppendCode(
            "Invoke {:#x}, {}, {}, {}",
            func,
            instr->ob_item(),
            instr->value(),
            instr->idx());
        break;
      }
      case Opcode::kRepeatList: {
        auto instr = static_cast<const RepeatList*>(&i);
        auto func = reinterpret_cast<uint64_t>(_PyList_Repeat);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}\n",
            instr->dst(),
            func,
            instr->seq(),
            instr->num());
        break;
      }
      case Opcode::kRepeatTuple: {
        auto instr = static_cast<const RepeatTuple*>(&i);
        auto func = reinterpret_cast<uint64_t>(_PyTuple_Repeat);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}\n",
            instr->dst(),
            func,
            instr->seq(),
            instr->num());
        break;
      }
      case Opcode::kMakeCheckedList: {
        auto instr = static_cast<const MakeCheckedList*>(&i);
        auto capacity = instr->GetCapacity();
        bbb.AppendCode(
            "Call {}, {:#x}, {:#x}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(_PyCheckedList_New),
            reinterpret_cast<uint64_t>(instr->type().typeSpec()),
            capacity);
        break;
      }
      case Opcode::kMakeCheckedDict: {
        auto instr = static_cast<const MakeCheckedDict*>(&i);
        auto capacity = instr->GetCapacity();
        if (capacity == 0) {
          bbb.AppendCode(
              "Call {}, {:#x}, {:#x}",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(_PyCheckedDict_New),
              reinterpret_cast<uint64_t>(instr->type().typeSpec()));
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {:#x}, {}",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(_PyCheckedDict_NewPresized),
              reinterpret_cast<uint64_t>(instr->type().typeSpec()),
              capacity);
        }
        break;
      }
      case Opcode::kMakeDict: {
        auto instr = static_cast<const MakeDict*>(&i);
        auto capacity = instr->GetCapacity();
        if (capacity == 0) {
          bbb.AppendCode(
              "Call {}, {:#x}",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(PyDict_New));
        } else {
          bbb.AppendCode(
              "Call {}, {:#x}, {}",
              instr->GetOutput(),
              reinterpret_cast<uint64_t>(_PyDict_NewPresized),
              capacity);
        }
        break;
      }
      case Opcode::kMakeSet: {
        auto instr = static_cast<const MakeSet*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, 0",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(PySet_New));
        break;
      }
      case Opcode::kMergeDictUnpack: {
        auto instr = static_cast<const MergeDictUnpack*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(__Invoke_PyDict_MergeEx),
            instr->GetOperand(0),
            instr->GetOperand(1),
            instr->GetOperand(2));
        break;
      }
      case Opcode::kMergeSetUnpack: {
        auto instr = static_cast<const MergeSetUnpack*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(_PySet_Update),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kSetDictItem: {
        auto instr = static_cast<const SetDictItem*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(_PyDict_SetItem),
            instr->GetOperand(0),
            instr->GetOperand(1),
            instr->GetOperand(2));
        break;
      }
      case Opcode::kSetSetItem: {
        auto instr = static_cast<const SetSetItem*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(PySet_Add),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kStoreSubscr: {
        auto instr = static_cast<const StoreSubscr*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyObject_SetItem),
            instr->container(),
            instr->index(),
            instr->value());

        break;
      }
      case Opcode::kInPlaceOp: {
        auto instr = static_cast<const InPlaceOp*>(&i);

        // NB: This needs to be in the order that the values appear in the
        // InPlaceOpKind enum
        static const uint64_t helpers[] = {
            reinterpret_cast<uint64_t>(PyNumber_InPlaceAdd),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceAnd),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceFloorDivide),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceLshift),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceMatrixMultiply),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceRemainder),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceMultiply),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceOr),
            reinterpret_cast<uint64_t>(PyNumber_InPlacePower),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceRshift),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceSubtract),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceTrueDivide),
            reinterpret_cast<uint64_t>(PyNumber_InPlaceXor),
        };
        JIT_CHECK(
            static_cast<unsigned long>(instr->op()) < sizeof(helpers),
            "unsupported inplaceop");

        auto op_kind = static_cast<int>(instr->op());

        if (instr->op() != InPlaceOpKind::kPower) {
          bbb.AppendCode(
              "Call {} {:#x}, {}, {}",
              instr->dst(),
              helpers[op_kind],
              instr->left(),
              instr->right());
        } else {
          bbb.AppendCode(
              "Call {} {:#x}, {}, {}, {:#x}",
              instr->dst(),
              helpers[op_kind],
              instr->left(),
              instr->right(),
              reinterpret_cast<uint64_t>(Py_None));
        }
        break;
      }
      case Opcode::kBranch: {
        break;
      }
      case Opcode::kBuildSlice: {
        auto instr = static_cast<const BuildSlice*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(_Invoke_PySlice_New),
            instr->start(),
            instr->stop(),
            instr->step() != nullptr ? instr->step()->name() : "0x0");

        break;
      }
      case Opcode::kGetIter: {
        auto instr = static_cast<const GetIter*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(PyObject_GetIter),
            instr->GetOperand(0));

        break;
      }
      case Opcode::kPhi: {
        auto instr = static_cast<const Phi*>(&i);

        std::stringstream ss;
        fmt::print(ss, "Phi {}", instr->GetOutput());
        // ss << "Phi " << *instr->GetOutput();

        for (size_t i = 0; i < instr->NumOperands(); i++) {
          fmt::print(
              ss,
              ", {:#x}, {}",
              reinterpret_cast<uint64_t>(instr->basic_blocks().at(i)),
              // Phis don't support constant inputs yet
              instr->GetOperand(i)->name());
        }

        bbb.AppendCode(ss.str());
        break;
      }
      case Opcode::kInitFunction: {
        auto instr = static_cast<const InitFunction*>(&i);

        bbb.AppendCode(
            "Invoke {:#x}, {}",
            reinterpret_cast<uint64_t>(PyEntry_init),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kMakeFunction: {
        auto instr = static_cast<const MakeFunction*>(&i);
        auto qualname = instr->GetOperand(0);
        auto code = instr->GetOperand(1);
        PyObject* globals = instr->frameState()->globals;
        env_->code_rt->addReference(globals);

        bbb.AppendCode(
            "Call {}, {:#x}, {}, {:#x}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(PyFunction_NewWithQualName),
            code,
            reinterpret_cast<uint64_t>(globals),
            qualname);
        break;
      }
      case Opcode::kSetFunctionAttr: {
        auto instr = static_cast<const SetFunctionAttr*>(&i);

        bbb.AppendCode(
            "Store {}, {}, {:#x}",
            instr->value(),
            instr->base(),
            instr->offset());
        break;
      }
      case Opcode::kListAppend: {
        auto instr = static_cast<const ListAppend*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(_PyList_APPEND),
            instr->GetOperand(0),
            instr->GetOperand(1));
        break;
      }
      case Opcode::kListExtend: {
        auto instr = static_cast<const ListExtend*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(__Invoke_PyList_Extend),
            instr->GetOperand(0),
            instr->GetOperand(1),
            instr->GetOperand(2));
        break;
      }
      case Opcode::kMakeTupleFromList: {
        auto instr = static_cast<const MakeTupleFromList*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PyList_AsTuple),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kGetTuple: {
        auto instr = static_cast<const GetTuple*>(&i);

        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(PySequence_Tuple),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kInvokeIterNext: {
        auto instr = static_cast<const InvokeIterNext*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, {}",
            instr->GetOutput(),
            reinterpret_cast<uint64_t>(jit::invokeIterNext),
            instr->GetOperand(0));
        break;
      }
      case Opcode::kLoadEvalBreaker: {
        // NB: This corresponds to an atomic load with
        // std::memory_order_relaxed. It's correct on x86-64 but probably isn't
        // on other architectures.
        static_assert(
            sizeof(_PyRuntime.ceval.eval_breaker._value) == 4,
            "Eval breaker is not a 4 byte value");
        auto eval_breaker =
            reinterpret_cast<uint64_t>(&_PyRuntime.ceval.eval_breaker._value);
        JIT_CHECK(
            i.GetOutput()->type() == TCInt32,
            "eval breaker output should be int");
        bbb.AppendCode("Load {}, {:#x}", i.GetOutput(), eval_breaker);
        break;
      }
      case Opcode::kRunPeriodicTasks: {
        auto func = reinterpret_cast<uint64_t>(&jit::runPeriodicTasks);
        bbb.AppendCode("Call {}, {:#x}", i.GetOutput(), func);
        break;
      }
      case Opcode::kSnapshot: {
        // Snapshots are purely informative
        break;
      }
      case Opcode::kUseType: {
        // UseTypes are purely informative
        break;
      }
      case Opcode::kHintType: {
        // HintTypes are purely informative
        break;
      }
      case Opcode::kBeginInlinedFunction: {
        // TODO(T109706798): Support calling from generators and inlining
        // generators.
        // TODO(emacs): Link all shadow frame prev pointers in function
        // prologue, since they need not happen with every call -- just the
        // data pointers need to be reset with every call.
        // TODO(emacs): If we manage to optimize leaf calls to a series of
        // non-deopting instructions, remove BeginInlinedFunction and
        // EndInlinedFunction completely.
        if (py_debug) {
          bbb.AppendCode(
              "Call {}, {:#x}, __asm_tstate",
              GetSafeTempName(),
              reinterpret_cast<uint64_t>(assertShadowCallStackConsistent));
        }
        auto instr = static_cast<const BeginInlinedFunction*>(&i);
        auto caller_shadow_frame = GetSafeTempName();
        bbb.AppendCode(
            "Lea {}, __native_frame_base, {}",
            caller_shadow_frame,
            shadowFrameOffsetBefore(instr));
        // There is already a shadow frame for the caller function.
        auto callee_shadow_frame = GetSafeTempName();
        bbb.AppendCode(
            "Lea {}, __native_frame_base, {}",
            callee_shadow_frame,
            shadowFrameOffsetOf(instr));
        bbb.AppendCode(
            "Store {}, {}, {}",
            caller_shadow_frame,
            callee_shadow_frame,
            SHADOW_FRAME_FIELD_OFF(prev));
        // Set code object data
        PyCodeObject* code = instr->code();
        env_->code_rt->addReference(reinterpret_cast<PyObject*>(code));
        PyObject* globals = instr->globals();
        env_->code_rt->addReference(reinterpret_cast<PyObject*>(globals));
        RuntimeFrameState* rtfs =
            env_->code_rt->allocateRuntimeFrameState(code, globals);
        uintptr_t data = _PyShadowFrame_MakeData(rtfs, PYSF_RTFS, PYSF_JIT);
        auto data_reg = GetSafeTempName();
        bbb.AppendCode("Move {}, {:#x}", data_reg, data);
        bbb.AppendCode(
            "Store {}, {}, {}",
            data_reg,
            callee_shadow_frame,
            SHADOW_FRAME_FIELD_OFF(data));
        // Set our shadow frame as top of shadow stack
        bbb.AppendCode(
            "Store {}, __asm_tstate, {}",
            callee_shadow_frame,
            offsetof(PyThreadState, shadow_frame));
        if (py_debug) {
          bbb.AppendCode(
              "Call {}, {:#x}, __asm_tstate",
              GetSafeTempName(),
              reinterpret_cast<uint64_t>(assertShadowCallStackConsistent));
        }
        break;
      }
      case Opcode::kEndInlinedFunction: {
        // TODO(T109706798): Support calling from generators and inlining
        // generators.
        if (py_debug) {
          bbb.AppendCode(
              "Call {}, {:#x}, __asm_tstate",
              GetSafeTempName(),
              reinterpret_cast<uint64_t>(assertShadowCallStackConsistent));
        }
        // callee_shadow_frame <- tstate.shadow_frame
        auto callee_shadow_frame = GetSafeTempName();
        bbb.AppendCode(
            "Load {}, __asm_tstate, {}",
            callee_shadow_frame,
            offsetof(PyThreadState, shadow_frame));

        // Check if the callee has been materialized into a PyFrame. Use the
        // flags below.
        static_assert(
            PYSF_PYFRAME == 1 && _PyShadowFrame_NumPtrKindBits == 2,
            "Unexpected constants");
        auto shadow_frame_data = GetSafeTempName();
        bbb.AppendCode(
            "Load {}, {}, {}",
            shadow_frame_data,
            callee_shadow_frame,
            SHADOW_FRAME_FIELD_OFF(data));
        bbb.AppendCode("BitTest {}, 0", shadow_frame_data);

        // caller_shadow_frame <- callee_shadow_frame.prev
        auto caller_shadow_frame = GetSafeTempName();
        bbb.AppendCode(
            "Load {}, {}, {}",
            caller_shadow_frame,
            callee_shadow_frame,
            SHADOW_FRAME_FIELD_OFF(prev));
        // caller_shadow_frame -> tstate.shadow_frame
        bbb.AppendCode(
            "Store {}, __asm_tstate, {}",
            caller_shadow_frame,
            offsetof(PyThreadState, shadow_frame));

        // Unlink PyFrame if needed. Someone might have materialized all of the
        // PyFrames via PyEval_GetFrame or similar.
        auto done = GetSafeLabelName();
        bbb.AppendCode("BranchNC {}", done);
        // TODO(T109445584): Remove this unused label.
        bbb.AppendCode("{}:", GetSafeLabelName());
        bbb.AppendCode(
            "Call {}, {}, __asm_tstate",
            GetSafeTempName(),
            reinterpret_cast<uint64_t>(JITRT_UnlinkFrame));
        bbb.AppendCode("{}:", done);
        if (py_debug) {
          bbb.AppendCode(
              "Call {}, {:#x}, __asm_tstate",
              GetSafeTempName(),
              reinterpret_cast<uint64_t>(assertShadowCallStackConsistent));
        }
        break;
      }
      case Opcode::kIsTruthy: {
        auto func = reinterpret_cast<uint64_t>(&PyObject_IsTrue);
        bbb.AppendCode(
            "Call {}, {:#x}, {}", i.GetOutput(), func, i.GetOperand(0));
        break;
      }
      case Opcode::kImportFrom: {
        auto& instr = static_cast<const ImportFrom&>(i);
        PyCodeObject* code = instr.frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr.nameIdx());
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}",
            i.GetOutput(),
            reinterpret_cast<uint64_t>(&_Py_DoImportFrom),
            instr.module(),
            name);
        break;
      }
      case Opcode::kImportName: {
        auto instr = static_cast<const ImportName*>(&i);
        PyCodeObject* code = instr->frameState()->code;
        PyObject* name = PyTuple_GET_ITEM(code->co_names, instr->name_idx());
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            i.GetOutput(),
            reinterpret_cast<uint64_t>(&JITRT_ImportName),
            name,
            instr->GetFromList(),
            instr->GetLevel());
        break;
      }
      case Opcode::kRaise: {
        const auto& instr = static_cast<const Raise&>(i);
        std::string exc = "0";
        std::string cause = "0";
        switch (instr.kind()) {
          case Raise::Kind::kReraise:
            break;
          case Raise::Kind::kRaiseWithExcAndCause:
            cause = instr.GetOperand(1)->name();
            // Fallthrough
          case Raise::Kind::kRaiseWithExc:
            exc = instr.GetOperand(0)->name();
        }
        bbb.AppendCode(
            "Invoke {:#x}, __asm_tstate, {}, {}",
            reinterpret_cast<uint64_t>(&_Py_DoRaise),
            exc,
            cause);
        bbb.AppendCode(MakeGuard("AlwaysFail", instr));
        break;
      }
      case Opcode::kRaiseStatic: {
        const auto& instr = static_cast<const RaiseStatic&>(i);
        std::stringstream args;
        for (size_t i = 0; i < instr.NumOperands(); i++) {
          args << ", " << *instr.GetOperand(i);
        }
        bbb.AppendCode(
            "Invoke {:#x}, {:#x}, {:#x}{}",
            reinterpret_cast<uint64_t>(&PyErr_Format),
            reinterpret_cast<uint64_t>(instr.excType()),
            reinterpret_cast<uint64_t>(instr.fmt()),
            args.str());
        bbb.AppendCode(MakeGuard("AlwaysFail", instr));
        break;
      }
      case Opcode::kFormatValue: {
        const auto& instr = static_cast<const FormatValue&>(i);
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            instr.dst(),
            reinterpret_cast<uint64_t>(JITRT_FormatValue),
            instr.GetOperand(0),
            instr.GetOperand(1),
            instr.conversion());
        break;
      }
      case Opcode::kBuildString: {
        const auto& instr = static_cast<const BuildString&>(i);

        // using vectorcall here although this is not strictly a vector call.
        // the callable is always null, and all the components to be
        // concatenated will be in the args argument.
        std::string s = fmt::format(
            "Vectorcall {}, {}, 0, 0",
            instr.dst(),
            reinterpret_cast<uint64_t>(JITRT_BuildString));
        for (size_t i = 0; i < instr.NumOperands(); i++) {
          s += fmt::format(", {}", instr.GetOperand(i));
        }

        s += ", 0";

        bbb.AppendCode(s);
        break;
      }
      case Opcode::kWaitHandleLoadWaiter: {
        const auto& instr = static_cast<const WaitHandleLoadWaiter&>(i);
        bbb.AppendCode(
            "Load {}, {}, {}",
            instr.GetOutput()->name(),
            instr.reg(),
            offsetof(PyWaitHandleObject, wh_waiter));
        break;
      }
      case Opcode::kWaitHandleLoadCoroOrResult: {
        const auto& instr = static_cast<const WaitHandleLoadCoroOrResult&>(i);
        bbb.AppendCode(
            "Load {}, {}, {}",
            instr.GetOutput()->name(),
            instr.reg(),
            offsetof(PyWaitHandleObject, wh_coro_or_result));
        break;
      }
      case Opcode::kWaitHandleRelease: {
        const auto& instr = static_cast<const WaitHandleRelease&>(i);
        std::string null_var = GetSafeTempName();
        bbb.AppendCode(
            "Store 0, {}, {}",
            instr.reg(),
            offsetof(PyWaitHandleObject, wh_coro_or_result));
        bbb.AppendCode(
            "Store 0, {}, {}",
            instr.reg(),
            offsetof(PyWaitHandleObject, wh_waiter));
        break;
      }
      case Opcode::kDeleteSubscr: {
        auto tmp = GetSafeTempName();
        const auto& instr = static_cast<const DeleteSubscr&>(i);
        bbb.AppendCode(
            "Call {}:CInt32, {:#x}, {}, {}",
            tmp,
            reinterpret_cast<uint64_t>(PyObject_DelItem),
            instr.GetOperand(0),
            instr.GetOperand(1));
        bbb.AppendCode(MakeGuard("NotNegative", instr, tmp));
        break;
      }
      case Opcode::kUnpackExToTuple: {
        auto instr = static_cast<const UnpackExToTuple*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {}, {}, {}",
            instr->dst(),
            reinterpret_cast<uint64_t>(JITRT_UnpackExToTuple),
            instr->seq(),
            instr->before(),
            instr->after());
        break;
      }
      case Opcode::kIsErrStopAsyncIteration: {
        auto instr = static_cast<const IsErrStopAsyncIteration*>(&i);
        bbb.AppendCode(
            "Call {}, {:#x}, __asm_tstate, {:#x}",
            instr->dst(),
            reinterpret_cast<uint64_t>(_PyErr_ExceptionMatches),
            reinterpret_cast<uint64_t>(PyExc_StopAsyncIteration));
        break;
      }
      case Opcode::kClearError: {
        bbb.AppendCode(
            "Invoke {:#x}, __asm_tstate",
            reinterpret_cast<uint64_t>(_PyErr_Clear));
        break;
      }
    }

    if (auto db = dynamic_cast<const DeoptBase*>(&i)) {
      switch (db->opcode()) {
        case Opcode::kCheckExc:
        case Opcode::kCheckField:
        case Opcode::kCheckVar:
        case Opcode::kDeleteAttr:
        case Opcode::kDeleteSubscr:
        case Opcode::kDeopt:
        case Opcode::kDeoptPatchpoint:
        case Opcode::kGuard:
        case Opcode::kGuardIs:
        case Opcode::kGuardType:
        case Opcode::kInvokeStaticFunction:
        case Opcode::kRaiseAwaitableError:
        case Opcode::kRaise:
        case Opcode::kRaiseStatic: {
          break;
        }
        case Opcode::kCompare: {
          auto op = static_cast<const Compare&>(i).op();
          if (op == CompareOp::kIs || op == CompareOp::kIsNot) {
            // These are implemented using pointer equality and cannot
            // throw.
            break;
          }
          emitExceptionCheck(*db, bbb);
          break;
        }
        default: {
          emitExceptionCheck(*db, bbb);
          break;
        }
      }
    }
  }

  // the last instruction must be kBranch, kCondBranch, or kReturn
  auto bbs = bbb.Generate();
  basic_blocks_.insert(basic_blocks_.end(), bbs.begin(), bbs.end());

  return {bbs.front(), bbs.back()};
}