runtime/marshal.cpp (489 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "marshal.h"
#include <cstdlib>
#include <cstring>
#include <memory>
#include "handles.h"
#include "heap.h"
#include "modules.h"
#include "runtime.h"
#include "set-builtins.h"
#include "tuple-builtins.h"
#include "utils.h"
#include "view.h"
namespace py {
enum {
FLAG_REF = '\x80', // with a type, add obj to index
TYPE_ASCII = 'a',
TYPE_ASCII_INTERNED = 'A',
TYPE_BINARY_COMPLEX = 'y',
TYPE_BINARY_FLOAT = 'g',
TYPE_CODE = 'c',
TYPE_COMPLEX = 'x',
TYPE_DICT = '{',
TYPE_ELLIPSIS = '.',
TYPE_FALSE = 'F',
TYPE_FLOAT = 'f',
TYPE_FROZENSET = '>',
TYPE_INTERNED = 't',
TYPE_INT = 'i',
TYPE_LIST = '[',
TYPE_LONG = 'l',
TYPE_NONE = 'N',
TYPE_NULL = '0',
TYPE_REF = 'r',
TYPE_SET = '<',
TYPE_SHORT_ASCII_INTERNED = 'Z',
TYPE_SHORT_ASCII = 'z',
TYPE_SMALL_TUPLE = ')',
TYPE_STOPITER = 'S',
TYPE_STRING = 's',
TYPE_TRUE = 'T',
TYPE_TUPLE = '(',
TYPE_UNICODE = 'u',
TYPE_UNKNOWN = '?',
};
Marshal::Reader::Reader(HandleScope* scope, Thread* thread, View<byte> buffer)
: thread_(thread),
runtime_(thread->runtime()),
refs_(scope, runtime_->newList()),
start_(buffer.data()),
length_(buffer.length()),
pos_(0) {
end_ = start_ + length_;
}
RawObject Marshal::Reader::readPycHeader(const Str& filename) {
if (length_ - pos_ < 4) {
return thread_->raiseWithFmt(
LayoutId::kEOFError, "reached end of file while reading header of '%S'",
&filename);
}
int32_t magic = readLong();
if (magic == kPycMagic) {
if (length_ - pos_ < 12) {
return thread_->raiseWithFmt(
LayoutId::kEOFError,
"reached end of file while reading header of '%S'", &filename);
}
readLong(); // read flags.
readLong(); // read source timestamp.
readLong(); // read source length.
DCHECK(pos_ == 16, "size mismatch");
} else {
return thread_->raiseWithFmt(LayoutId::kImportError,
"unsupported magic number in '%S'", &filename);
}
return NoneType::object();
}
void Marshal::Reader::setBuiltinFunctions(
const BuiltinFunction* builtin_functions, word num_builtin_functions,
const IntrinsicFunction* intrinsic_functions,
word num_intrinsic_functions) {
builtin_functions_ = builtin_functions;
num_builtin_functions_ = num_builtin_functions;
intrinsic_functions_ = intrinsic_functions;
num_intrinsic_functions_ = num_intrinsic_functions;
}
const byte* Marshal::Reader::readBytes(int length) {
const byte* result = &start_[pos_];
pos_ += length;
return result;
}
byte Marshal::Reader::readByte() {
byte result = 0xFF;
const byte* buffer = readBytes(1);
if (buffer != nullptr) {
result = buffer[0];
}
return result;
}
int16_t Marshal::Reader::readShort() {
int16_t result = -1;
const byte* buffer = readBytes(sizeof(result));
if (buffer != nullptr) {
result = buffer[0];
result |= buffer[1] << 8;
}
return result;
}
int32_t Marshal::Reader::readLong() {
int32_t result = -1;
const byte* buffer = readBytes(4);
if (buffer != nullptr) {
result = buffer[0];
result |= buffer[1] << 8;
result |= buffer[2] << 16;
result |= buffer[3] << 24;
}
return result;
}
double Marshal::Reader::readBinaryFloat() {
double result;
const byte* buffer = readBytes(sizeof(result));
std::memcpy(&result, buffer, sizeof(result));
return result;
}
RawObject Marshal::Reader::readObject() {
byte code = readByte();
byte flag = code & FLAG_REF;
byte type = code & ~FLAG_REF;
isRef_ = flag;
switch (type) {
case TYPE_NULL:
return SmallInt::fromWord(0);
case TYPE_NONE:
return NoneType::object();
case TYPE_STOPITER:
UNIMPLEMENTED("TYPE_STOPITER");
case TYPE_ELLIPSIS:
return runtime_->ellipsis();
case TYPE_FALSE:
return Bool::falseObj();
case TYPE_TRUE:
return Bool::trueObj();
case TYPE_INT: {
// NB: this will continue to work as long as SmallInt can contain the
// full range of 32 bit signed integer values. Notably, this will break if
// we need to support 32 bit machines.
word n = readLong();
if (!SmallInt::isValid(n)) {
UNIMPLEMENTED("value '%ld' outside range supported by RawSmallInt", n);
}
HandleScope scope(thread_);
Object result(&scope, SmallInt::fromWord(n));
if (isRef_) {
addRef(result);
}
return *result;
}
case TYPE_FLOAT:
UNIMPLEMENTED("TYPE_FLOAT");
case TYPE_BINARY_FLOAT: {
double n = readBinaryFloat();
HandleScope scope(thread_);
Object result(&scope, runtime_->newFloat(n));
if (isRef_) {
addRef(result);
}
return *result;
}
case TYPE_COMPLEX:
UNIMPLEMENTED("TYPE_COMPLEX");
case TYPE_BINARY_COMPLEX: {
double real = readBinaryFloat();
double imag = readBinaryFloat();
HandleScope scope(thread_);
Object result(&scope, runtime_->newComplex(real, imag));
if (isRef_) {
addRef(result);
}
return *result;
}
case TYPE_STRING: // Misnomer, should be TYPE_BYTES
return readTypeString();
case TYPE_INTERNED:
case TYPE_ASCII_INTERNED:
return readTypeAsciiInterned();
case TYPE_UNICODE:
case TYPE_ASCII: {
return readTypeAscii();
}
case TYPE_SHORT_ASCII_INTERNED:
return readTypeShortAsciiInterned();
case TYPE_SHORT_ASCII:
return readTypeShortAscii();
case TYPE_SMALL_TUPLE:
return readTypeSmallTuple();
case TYPE_TUPLE:
return readTypeTuple();
case TYPE_LIST:
UNIMPLEMENTED("TYPE_LIST");
case TYPE_DICT:
UNIMPLEMENTED("TYPE_DICT");
case TYPE_SET:
return readTypeSet();
case TYPE_FROZENSET:
return readTypeFrozenSet();
case TYPE_CODE:
return readTypeCode();
case TYPE_REF:
return readTypeRef();
case TYPE_LONG:
return readLongObject();
default:
UNREACHABLE("unknown type '%c' (flags=%x)", type, flag);
}
UNREACHABLE("all cases should be covered");
}
word Marshal::Reader::addRef(const Object& value) {
word result = refs_.numItems();
runtime_->listAdd(thread_, refs_, value);
return result;
}
void Marshal::Reader::setRef(word index, RawObject value) {
refs_.atPut(index, value);
}
RawObject Marshal::Reader::getRef(word index) { return refs_.at(index); }
word Marshal::Reader::numRefs() { return refs_.numItems(); }
RawObject Marshal::Reader::readTypeString() {
int32_t length = readLong();
const byte* data = readBytes(length);
HandleScope scope(thread_);
Object result(&scope, runtime_->newBytesWithAll(View<byte>(data, length)));
if (isRef_) {
addRef(result);
}
return *result;
}
RawObject Marshal::Reader::readTypeAscii() {
word length = readLong();
if (length < 0) {
return thread_->raiseWithFmt(LayoutId::kValueError,
"bad marshal data (string size out of range)");
}
return readStr(length);
}
RawObject Marshal::Reader::readTypeAsciiInterned() {
word length = readLong();
if (length < 0) {
return thread_->raiseWithFmt(LayoutId::kValueError,
"bad marshal data (string size out of range)");
}
return readAndInternStr(length);
}
RawObject Marshal::Reader::readTypeShortAscii() {
word length = readByte();
return readStr(length);
}
RawObject Marshal::Reader::readTypeShortAsciiInterned() {
word length = readByte();
return readAndInternStr(length);
}
RawObject Marshal::Reader::readStr(word length) {
const byte* data = readBytes(length);
HandleScope scope(thread_);
Object result(&scope, runtime_->newStrWithAll(View<byte>(data, length)));
if (isRef_) {
addRef(result);
}
return *result;
}
RawObject Marshal::Reader::readAndInternStr(word length) {
const byte* data = readBytes(length);
HandleScope scope(thread_);
Object result(&scope,
Runtime::internStrFromAll(thread_, View<byte>(data, length)));
if (isRef_) {
addRef(result);
}
return *result;
}
RawObject Marshal::Reader::readTypeSmallTuple() {
int32_t n = readByte();
return doTupleElements(n);
}
RawObject Marshal::Reader::readTypeTuple() {
int32_t n = readLong();
return doTupleElements(n);
}
RawObject Marshal::Reader::doTupleElements(int32_t length) {
HandleScope scope(thread_);
if (length == 0) {
Object result(&scope, runtime_->emptyTuple());
if (isRef_) {
addRef(result);
}
return *result;
}
MutableTuple result(&scope, runtime_->newMutableTuple(length));
if (isRef_) {
addRef(result);
}
for (int32_t i = 0; i < length; i++) {
RawObject value = readObject();
result.atPut(i, value);
}
return result.becomeImmutable();
}
RawObject Marshal::Reader::readTypeSet() {
int32_t n = readLong();
HandleScope scope(thread_);
Set set(&scope, runtime_->newSet());
return doSetElements(n, set);
}
RawObject Marshal::Reader::readTypeFrozenSet() {
int32_t n = readLong();
if (n == 0) {
return runtime_->emptyFrozenSet();
}
HandleScope scope(thread_);
FrozenSet set(&scope, runtime_->newFrozenSet());
return doSetElements(n, set);
}
RawObject Marshal::Reader::doSetElements(int32_t length, const SetBase& set) {
if (isRef_) {
addRef(set);
}
HandleScope scope(thread_);
Object value(&scope, NoneType::object());
Object hash_obj(&scope, NoneType::object());
for (int32_t i = 0; i < length; i++) {
value = readObject();
hash_obj = Interpreter::hash(thread_, value);
DCHECK(!hash_obj.isErrorException(), "must be hashable");
word hash = SmallInt::cast(*hash_obj).value();
RawObject result = setAdd(thread_, set, value, hash);
if (result.isError()) {
return result;
}
}
return *set;
}
RawObject Marshal::Reader::readTypeCode() {
word index = -1;
HandleScope scope(thread_);
if (isRef_) {
// Reserve a reflist index
Object none(&scope, NoneType::object());
index = addRef(none);
}
int32_t argcount = readLong();
int32_t posonlyargcount = readLong();
int32_t kwonlyargcount = readLong();
int32_t nlocals = readLong();
uint32_t stacksize = readLong();
int32_t flags = readLong();
CHECK(flags <= (Code::Flags::kLast << 1) - 1, "unknown flags in code object");
Object code(&scope, readObject());
Tuple consts(&scope, readObject());
Object names(&scope, readObject());
Tuple varnames(&scope, readObject());
Tuple freevars(&scope, readObject());
Tuple cellvars(&scope, readObject());
Object filename(&scope, readObject());
Object name(&scope, readObject());
int32_t firstlineno = readLong();
Object lnotab(&scope, readObject());
word intrinsic_index = 0;
if (flags & Code::Flags::kMetadata) {
Object metadata_obj(&scope, consts.at(0));
CHECK(metadata_obj.isTuple() && Tuple::cast(*metadata_obj).length() == 1,
"malformed metadata");
Tuple metadata(&scope, *metadata_obj);
Object intrinsic(&scope, metadata.at(0));
CHECK(intrinsic.isSmallInt(), "malformed intrinsic ID");
intrinsic_index = SmallInt::cast(*intrinsic).value();
consts = runtime_->tupleSubseq(thread_, consts, 1, consts.length() - 1);
}
IntrinsicFunction intrinsic = nullptr;
if (intrinsic_functions_ != nullptr && intrinsic_index != 0) {
CHECK_INDEX(intrinsic_index - 1, num_intrinsic_functions_);
// The intrinsic IDs are biased by 1 so that 0 means no intrinsic
intrinsic = intrinsic_functions_[intrinsic_index - 1];
}
Object result(&scope, NoneType::object());
if (flags & Code::Flags::kBuiltin) {
word function_index = stacksize;
CHECK(code.isBytes() && Bytes::cast(*code).length() == 0,
"must not have bytecode in native code");
CHECK(consts.length() == 0, "consts should contain only metadata");
CHECK(names.isTuple() && Tuple::cast(*names).length() == 0,
"must not have variables in native code");
CHECK(freevars.length() == 0, "must not have free vars in native code");
CHECK(cellvars.length() == 0, "must not have cell vars in native code");
CHECK_INDEX(function_index, num_builtin_functions_);
BuiltinFunction function = builtin_functions_[function_index];
result = runtime_->newBuiltinCode(argcount, posonlyargcount, kwonlyargcount,
flags, function, varnames, name);
Code::cast(*result).setFilename(*filename);
Code::cast(*result).setFirstlineno(firstlineno);
} else {
result = runtime_->newCode(argcount, posonlyargcount, kwonlyargcount,
nlocals, stacksize, flags, code, consts, names,
varnames, freevars, cellvars, filename, name,
firstlineno, lnotab);
}
Code::cast(*result).setIntrinsic(reinterpret_cast<void*>(intrinsic));
if (index >= 0) {
setRef(index, *result);
}
return *result;
}
RawObject Marshal::Reader::readTypeRef() {
int32_t n = readLong();
return getRef(n);
}
RawObject Marshal::Reader::readLongObject() {
int32_t n = readLong();
if (n == 0) {
HandleScope scope(thread_);
Object zero(&scope, SmallInt::fromWord(0));
if (isRef_) {
addRef(zero);
}
return *zero;
}
if (n < kMinInt32 || n > kMaxInt32) {
return thread_->raiseWithFmt(LayoutId::kValueError,
"bad marshal data (string size out of range)");
}
word bits_consumed = 0;
word n_bits = std::abs(n) * kBitsPerLongDigit;
word num_digits = ((n_bits + kBitsPerWord + 1) / kBitsPerWord) + 1;
std::unique_ptr<uword[]> digits{new uword[num_digits]};
word digits_idx = 0;
uword buf = 0;
word word_offset = 0;
while (bits_consumed < n_bits) {
int16_t digit = readShort();
if (digit < 0) {
return thread_->raiseWithFmt(LayoutId::kValueError,
"bad marshal data (negative long digit)");
}
auto unsigned_digit = static_cast<uword>(digit);
if (word_offset + kBitsPerLongDigit <= kBitsPerWord) {
buf |= unsigned_digit << word_offset;
word_offset += kBitsPerLongDigit;
if (word_offset == kBitsPerWord) {
digits[digits_idx++] = buf;
buf = 0;
word_offset = 0;
}
} else {
word extra_bits = (word_offset + kBitsPerLongDigit) % kBitsPerWord;
word bits_to_include = kBitsPerLongDigit - extra_bits;
buf |= (unsigned_digit & ((1 << bits_to_include) - 1)) << word_offset;
digits[digits_idx++] = buf;
buf = (unsigned_digit >> bits_to_include) & ((1 << extra_bits) - 1);
word_offset = extra_bits;
}
bits_consumed += kBitsPerLongDigit;
}
if (word_offset > 0 && buf != 0) {
digits[digits_idx++] = buf;
} else if (n > 0 && (digits[digits_idx - 1] >> (kBitsPerWord - 1))) {
// Zero extend if the MSB is set in the top digit and either the result is
// positive or the top digit has at least one other bit set (in which case
// we need the extra digit for the negation).
digits[digits_idx++] = 0;
}
if (n < 0) {
uword carry = 1;
for (word i = 0; i < digits_idx; i++) {
uword digit = digits[i];
carry = __builtin_uaddl_overflow(~digit, carry, &digit);
digits[i] = digit;
}
DCHECK(carry == 0, "Carry should be zero");
if ((digits[digits_idx - 1] >> (kBitsPerWord - 1)) == 0) {
digits[digits_idx++] = kMaxUword;
}
}
HandleScope scope(thread_);
Object result(&scope, NoneType::object());
if (digits_idx == 0) {
result = SmallInt::fromWord(0);
} else if (digits_idx == 1 &&
SmallInt::isValid(static_cast<word>(digits[0]))) {
result = SmallInt::fromWord(static_cast<word>(digits[0]));
} else {
result =
runtime_->newLargeIntWithDigits(View<uword>(digits.get(), digits_idx));
}
if (isRef_) {
addRef(result);
}
return *result;
}
} // namespace py