runtime/byteslike.cpp (132 lines of code) (raw):

// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) #include "byteslike.h" #include "array-module.h" #include "handles.h" #include "runtime.h" #include "unicode.h" namespace py { Byteslike::Byteslike(HandleScope* scope, Thread* thread, RawObject object) : d_{} { if (object.isSmallBytes()) { RawSmallBytes small_bytes = SmallBytes::cast(object); initWithSmallData(small_bytes, small_bytes.length()); return; } if (object.isLargeBytes()) { RawLargeBytes bytes = LargeBytes::cast(object); initWithLargeBytes(scope, bytes, bytes.length()); return; } if (object.isMemoryView()) { RawMemoryView memory_view = MemoryView::cast(object); RawObject buffer = memory_view.buffer(); word length = memory_view.length(); word start = memory_view.start(); if (buffer.isLargeBytes()) { if (start != 0) { UNIMPLEMENTED("non-zero start on DataArray not supported yet"); } initWithLargeBytes(scope, LargeBytes::cast(buffer), length); return; } if (buffer.isPointer()) { byte* data = static_cast<byte*>(Pointer::cast(buffer).cptr()) + start; initWithMemory(data, length); return; } if (buffer.isSmallBytes()) { initWithSmallData(SmallBytes::cast(buffer), length); d_.small.reference += start; return; } UNIMPLEMENTED("TODO memoryview from C extension object?"); } Runtime* runtime = thread->runtime(); if (runtime->isInstanceOfBytearray(object)) { RawBytearray bytearray = object.rawCast<RawBytearray>(); initWithLargeBytes(scope, MutableBytes::cast(bytearray.items()), bytearray.numItems()); return; } if (runtime->isInstanceOfBytes(object)) { RawObject bytes = object.rawCast<RawUserBytesBase>().value(); if (bytes.isImmediateObjectNotSmallInt()) { RawSmallBytes small_bytes = SmallBytes::cast(bytes); initWithSmallData(small_bytes, small_bytes.length()); return; } RawLargeBytes large_bytes = LargeBytes::cast(bytes); initWithLargeBytes(scope, large_bytes, large_bytes.length()); return; } if (runtime->isInstanceOfArray(object)) { RawArray array = object.rawCast<RawArray>(); word length = arrayByteLength(array); initWithLargeBytes(scope, MutableBytes::cast(array.buffer()), length); return; } DCHECK(!runtime->isByteslike(object), "expected non-byteslike"); d_.handle.object = Error::error(); next_ = nullptr; } inline void Byteslike::initWithLargeBytes(HandleScope* scope, RawLargeBytes bytes, word length) { static_assert(sizeof(Byteslike) == sizeof(Object) + sizeof(length_), "size mismatch"); DCHECK_BOUND(length, bytes.length()); Thread* thread = scope->thread(); d_.handle.object = bytes; d_.handle.thread = thread; Handle<RawObject>* as_handle = reinterpret_cast<Handle<RawObject>*>(this); next_ = thread->handles()->push(as_handle); length_ = length; } inline void Byteslike::initWithMemory(byte* data, word length) { // Add `kHeapObjectTag` to the pointer. This mirrors the way references into // the managed heap work (compare with `RawHeapObject::fromAddress`) so we can // use the same code to access non-managed and managed memory. d_.reference = reinterpret_cast<uword>(data) + Object::kHeapObjectTag; next_ = nullptr; length_ = length; } inline void Byteslike::initWithSmallData(RawSmallBytes bytes, word length) { d_.small.small_storage = bytes; const byte* data = smallDataData(&d_.small.small_storage); // Add `kHeapObjectTag` to the pointer. This mirrors the way references into // the managed heap work (compare with `RawHeapObject::fromAddress`) so we can // use the same code to access non-managed and managed memory. d_.small.reference = reinterpret_cast<uword>(data) + Object::kHeapObjectTag; next_ = nullptr; length_ = length; } RawObject byteslikeReprSmartQuotes(Thread* thread, const Byteslike& byteslike) { // Precalculate the length of the result to minimize allocation. word length = byteslike.length(); word num_single_quotes = 0; bool has_double_quotes = false; word result_length = length + 3; // b'' for (word i = 0; i < length; i++) { byte current = byteslike.byteAt(i); switch (current) { case '\'': num_single_quotes++; break; case '"': has_double_quotes = true; break; case '\t': case '\n': case '\r': case '\\': result_length++; break; default: if (!ASCII::isPrintable(current)) { result_length += 3; } } } byte delimiter = '\''; if (num_single_quotes > 0) { if (has_double_quotes) { result_length += num_single_quotes; } else { delimiter = '"'; } } return thread->runtime()->byteslikeRepr(thread, byteslike, result_length, delimiter); } } // namespace py