Jit/bytecode.h (192 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#pragma once
#include "Python.h"
#include "opcode.h"
#include "Jit/log.h"
#include <iterator>
#include <unordered_set>
namespace jit {
extern const std::unordered_set<int> kBranchOpcodes;
extern const std::unordered_set<int> kRelBranchOpcodes;
// A structured, immutable representation of a CPython bytecode
class BytecodeInstruction {
public:
BytecodeInstruction(_Py_CODEUNIT* instrs, Py_ssize_t idx) {
offset_ = idx * sizeof(_Py_CODEUNIT);
_Py_CODEUNIT word = instrs[idx];
opcode_ = _Py_OPCODE(word);
oparg_ = _Py_OPARG(word);
}
BytecodeInstruction(int opcode, int oparg, Py_ssize_t offset)
: offset_(offset), opcode_(opcode), oparg_(oparg) {}
Py_ssize_t offset() const {
return offset_;
}
Py_ssize_t index() const {
return offset() / sizeof(_Py_CODEUNIT);
}
int opcode() const {
return opcode_;
}
int oparg() const {
return oparg_;
}
int opargAsIndex() const {
return oparg() / sizeof(_Py_CODEUNIT);
}
bool IsBranch() const {
return kBranchOpcodes.count(opcode());
}
bool IsCondBranch() const {
// TODO(mpage): Fill this out
switch (opcode_) {
case FOR_ITER:
case POP_JUMP_IF_FALSE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case JUMP_IF_ZERO_OR_POP: {
return true;
}
default: {
return false;
}
}
}
bool IsRaiseVarargs() const {
return opcode() == RAISE_VARARGS;
}
bool IsReturn() const {
return opcode() == RETURN_VALUE;
}
bool IsTerminator() const {
return IsBranch() || IsReturn() || IsRaiseVarargs();
}
Py_ssize_t GetJumpTarget() const {
if (kRelBranchOpcodes.count(opcode())) {
return NextInstrOffset() + oparg();
}
return oparg();
}
Py_ssize_t GetJumpTargetAsIndex() const {
return GetJumpTarget() / sizeof(_Py_CODEUNIT);
}
Py_ssize_t NextInstrOffset() const {
return offset_ + sizeof(_Py_CODEUNIT);
}
Py_ssize_t NextInstrIndex() const {
return NextInstrOffset() / sizeof(_Py_CODEUNIT);
}
void ExtendOpArgWith(int changes) {
oparg_ = (changes << 8) | oparg_;
}
private:
Py_ssize_t offset_;
int opcode_;
int oparg_;
};
// A half open block of bytecode [start, end) viewed as a sequence of
// `BytecodeInstruction`s
//
// Extended args are handled automatically when iterating over the bytecode;
// they will not appear in the stream of `BytecodeInstruction`s.
class BytecodeInstructionBlock {
public:
explicit BytecodeInstructionBlock(PyCodeObject* code)
: instrs_(code->co_rawcode),
start_idx_(0),
end_idx_(code->co_codelen / sizeof(_Py_CODEUNIT)) {}
BytecodeInstructionBlock(
_Py_CODEUNIT* instrs,
Py_ssize_t start,
Py_ssize_t end)
: instrs_(instrs), start_idx_(start), end_idx_(end) {}
class Iterator {
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = BytecodeInstruction;
using pointer = const value_type*;
using reference = const value_type&;
Iterator(_Py_CODEUNIT* instr, Py_ssize_t idx, Py_ssize_t end_idx)
: instr_(instr), idx_(idx), end_idx_(end_idx), bci_(0, 0, 0) {
if (!atEnd()) {
// Iterator end() methods are supposed to be past the logical end
// of the underlying data structure and should not be accessed
// directly. Dereferencing instr would be a heap buffer overflow.
bci_ = BytecodeInstruction(
_Py_OPCODE(*instr), _Py_OPARG(*instr), idx * sizeof(_Py_CODEUNIT));
consumeExtendedArgs();
}
}
bool atEnd() const {
return idx_ == end_idx_;
}
reference operator*() {
JIT_DCHECK(
!atEnd(), "cannot read past the end of BytecodeInstructionBlock");
return bci_;
}
pointer operator->() {
JIT_DCHECK(
!atEnd(), "cannot read past the end of BytecodeInstructionBlock");
return &bci_;
}
Iterator& operator++() {
instr_++;
idx_++;
consumeExtendedArgs();
return *this;
}
Iterator operator++(int) {
Iterator tmp = *this;
++(*this);
return tmp;
}
bool operator==(const Iterator& other) const {
return instr_ == other.instr_;
}
bool operator!=(const Iterator& other) const {
return !(*this == other);
}
Py_ssize_t remainingInstrs() const {
return end_idx_ - idx_ - 1;
}
private:
void consumeExtendedArgs() {
int accum = 0;
while (!atEnd() && (_Py_OPCODE(*instr_) == EXTENDED_ARG)) {
accum = (accum << 8) | _Py_OPARG(*instr_);
instr_++;
idx_++;
}
if (!atEnd()) {
int opcode = _Py_OPCODE(*instr_);
int oparg = (accum << 8) | _Py_OPARG(*instr_);
bci_ = BytecodeInstruction(opcode, oparg, idx_ * sizeof(_Py_CODEUNIT));
}
}
_Py_CODEUNIT* instr_;
Py_ssize_t idx_;
Py_ssize_t end_idx_;
BytecodeInstruction bci_;
};
Iterator begin() const {
return Iterator(instrs_ + start_idx_, start_idx_, end_idx_);
}
Iterator end() const {
return Iterator(instrs_ + end_idx_, end_idx_, end_idx_);
}
Py_ssize_t startOffset() const {
return start_idx_ * sizeof(_Py_CODEUNIT);
}
Py_ssize_t endOffset() const {
return end_idx_ * sizeof(_Py_CODEUNIT);
}
Py_ssize_t size() const {
return end_idx_ - start_idx_;
}
BytecodeInstruction at(Py_ssize_t idx) const {
return BytecodeInstruction(instrs_, start_idx_ + idx);
}
BytecodeInstruction lastInstr() const {
return BytecodeInstruction(instrs_, end_idx_ - 1);
}
_Py_CODEUNIT* bytecode() const {
return instrs_;
}
private:
_Py_CODEUNIT* instrs_;
Py_ssize_t start_idx_;
Py_ssize_t end_idx_;
};
} // namespace jit