runtime/bytes-builtins.cpp (993 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "bytes-builtins.h"
#include "builtins.h"
#include "bytearray-builtins.h"
#include "byteslike.h"
#include "formatter-utils.h"
#include "frame.h"
#include "int-builtins.h"
#include "runtime.h"
#include "slice-builtins.h"
#include "strarray-builtins.h"
#include "type-builtins.h"
#include "unicode.h"
#include "utils.h"
namespace py {
RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes) {
HandleScope scope(thread);
if (!bytes.isASCII()) {
return Unbound::object();
}
if (bytes.isSmallBytes()) {
return SmallBytes::cast(*bytes).becomeStr();
}
word bytes_len = LargeBytes::cast(*bytes).length();
MutableBytes buf(&scope,
thread->runtime()->newMutableBytesUninitialized(bytes_len));
buf.replaceFromWith(0, LargeBytes::cast(*bytes), bytes_len);
return buf.becomeStr();
}
word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle,
word needle_len, word start, word end) {
DCHECK_BOUND(haystack_len, haystack.length());
DCHECK_BOUND(needle_len, needle.length());
if (start > haystack_len) {
return 0;
}
Slice::adjustSearchIndices(&start, &end, haystack_len);
if (needle_len == 0) {
return haystack_len - start + 1;
}
word count = 0;
word index =
bytesFind(haystack, haystack_len, needle, needle_len, start, end);
while (index != -1) {
count++;
index = bytesFind(haystack, haystack_len, needle, needle_len,
index + needle_len, end);
}
return count;
}
word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
word needle_len, word start, word end) {
DCHECK_BOUND(haystack_len, haystack.length());
DCHECK_BOUND(needle_len, needle.length());
Slice::adjustSearchIndices(&start, &end, haystack_len);
for (word i = start; i <= end - needle_len; i++) {
bool has_match = true;
for (word j = 0; has_match && j < needle_len; j++) {
has_match = haystack.byteAt(i + j) == needle.byteAt(j);
}
if (has_match) {
return i;
}
}
return -1;
}
RawObject bytesHex(Thread* thread, const Bytes& bytes, word length) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(length * 2));
for (word i = 0, j = 0; i < length; i++) {
byte b = bytes.byteAt(i);
uwordToHexadecimalWithMutableBytes(*result, /*index=*/j,
/*num_digits=*/2, b);
j += 2;
}
return result.becomeStr();
}
static RawObject smallBytesJoin(Thread* thread, const Bytes& sep,
word sep_length, const Tuple& src,
word src_length, word result_length) {
HandleScope scope(thread);
byte buffer[SmallBytes::kMaxLength];
byte* dst = buffer;
for (word src_index = 0; src_index < src_length; src_index++) {
if (src_index > 0) {
sep.copyTo(dst, sep_length);
dst += sep_length;
}
Byteslike object(&scope, thread, src.at(src_index));
word length = object.length();
object.copyTo(dst, length);
dst += length;
}
DCHECK(dst == buffer + result_length, "unexpected number of bytes written");
return SmallBytes::fromBytes({buffer, result_length});
}
RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length,
const Tuple& src, word src_length) {
DCHECK_BOUND(src_length, src.length());
bool is_mutable = sep.isMutableBytes();
Runtime* runtime = thread->runtime();
if (src_length == 0) {
if (is_mutable) {
return runtime->emptyMutableBytes();
}
return Bytes::empty();
}
HandleScope scope(thread);
// first pass to accumulate length and check types
word result_length = sep_length * (src_length - 1);
Object item(&scope, Unbound::object());
for (word index = 0; index < src_length; index++) {
item = src.at(index);
Byteslike object(&scope, thread, *item);
if (!object.isValid()) {
return thread->raiseWithFmt(
LayoutId::kTypeError,
"sequence item %w: expected a bytes-like object, '%T' found", index,
&item);
}
result_length += object.length();
}
// second pass to accumulate concatenation
if (result_length <= SmallBytes::kMaxLength && !is_mutable) {
return smallBytesJoin(thread, sep, sep_length, src, src_length,
result_length);
}
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(result_length));
word dst_offset = 0;
for (word src_index = 0;;) {
Byteslike object(&scope, thread, src.at(src_index));
word length = object.length();
result.replaceFromWithByteslike(dst_offset, object, length);
dst_offset += length;
src_index++;
if (src_index >= src_length) break;
result.replaceFromWithBytes(dst_offset, *sep, sep_length);
dst_offset += sep_length;
}
DCHECK(dst_offset == result_length, "offset must match expected length");
return is_mutable ? *result : result.becomeImmutable();
}
word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
word needle_len, word start, word end) {
DCHECK_BOUND(haystack_len, haystack.length());
DCHECK_BOUND(needle_len, needle.length());
Slice::adjustSearchIndices(&start, &end, haystack_len);
for (word i = end - needle_len; i >= start; i--) {
bool has_match = true;
for (word j = 0; has_match && j < needle_len; j++) {
has_match = haystack.byteAt(i + j) == needle.byteAt(j);
}
if (has_match) {
return i;
}
}
return -1;
}
RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes) {
HandleScope scope(thread);
Byteslike byteslike(&scope, thread, *bytes);
// Precalculate the length of the result to minimize allocation.
word length = byteslike.length();
word result_length = length + 3; // b''
for (word i = 0; i < length; i++) {
byte current = byteslike.byteAt(i);
switch (current) {
case '\t':
case '\n':
case '\r':
case '\'':
case '\\':
result_length++;
break;
default:
if (!ASCII::isPrintable(current)) {
result_length += 3;
}
}
}
if (result_length > SmallInt::kMaxValue) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"bytes object is too large to make repr");
}
return thread->runtime()->byteslikeRepr(thread, byteslike, result_length,
'\'');
}
// Returns the index of the first byte in bytes that is not in chars.
static word bytesSpanLeft(const Bytes& bytes, word bytes_len,
const Bytes& chars, word chars_len) {
for (word left = 0; left < bytes_len; left++) {
byte ch = bytes.byteAt(left);
bool found_in_chars = false;
for (word i = 0; i < chars_len; i++) {
if (ch == chars.byteAt(i)) {
found_in_chars = true;
break;
}
}
if (!found_in_chars) {
return left;
}
}
return bytes_len;
}
// Returns the index of the last byte in bytes that is not in chars. Stops at
// and returns the left bound if all characters to the right were found.
static word bytesSpanRight(const Bytes& bytes, word bytes_len,
const Bytes& chars, word chars_len, word left) {
for (word right = bytes_len; left < right; right--) {
byte ch = bytes.byteAt(right - 1);
bool found_in_chars = false;
for (word i = 0; i < chars_len; i++) {
if (ch == chars.byteAt(i)) {
found_in_chars = true;
break;
}
}
if (!found_in_chars) {
return right;
}
}
return left;
}
RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length,
bool keepends) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
List result(&scope, runtime->newList());
Object subseq(&scope, Unbound::object());
for (word i = 0, j = 0; i < length; j = i) {
// Skip newline bytes
while (i < length) {
byte b = bytes.byteAt(i);
// PEP-278
if (b == '\n' || b == '\r') {
break;
}
i++;
}
word eol_pos = i;
if (i < length) {
word cur = i;
word next = i + 1;
i++;
// Check for \r\n specifically
if (bytes.byteAt(cur) == '\r' && next < length &&
bytes.byteAt(next) == '\n') {
i++;
}
if (keepends) {
eol_pos = i;
}
}
// If there are no newlines, the bytes returned should be identity-equal
if (j == 0 && eol_pos == length) {
runtime->listAdd(thread, result, bytes);
return *result;
}
subseq = bytesSubseq(thread, bytes, j, eol_pos - j);
runtime->listAdd(thread, result, subseq);
}
return *result;
}
RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len,
const Bytes& chars, word chars_len) {
word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len);
word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, left);
return bytesSubseq(thread, bytes, left, right - left);
}
RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len,
const Bytes& chars, word chars_len) {
word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len);
return bytesSubseq(thread, bytes, left, bytes_len - left);
}
RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len,
const Bytes& chars, word chars_len) {
word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, 0);
return bytesSubseq(thread, bytes, 0, right);
}
RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len) {
word left = 0;
while (left < len && ASCII::isSpace(bytes.byteAt(left))) {
left++;
}
word right = len;
while (right > left && ASCII::isSpace(bytes.byteAt(right - 1))) {
right--;
}
return bytesSubseq(thread, bytes, left, right - left);
}
RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len) {
word left = 0;
while (left < len && ASCII::isSpace(bytes.byteAt(left))) {
left++;
}
return bytesSubseq(thread, bytes, left, len - left);
}
RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len) {
word right = len;
while (right > 0 && ASCII::isSpace(bytes.byteAt(right - 1))) {
right--;
}
return bytesSubseq(thread, bytes, 0, right);
}
RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start,
word length) {
DCHECK_BOUND(start, bytes.length());
DCHECK_BOUND(length, bytes.length() - start);
if (length <= SmallBytes::kMaxLength) {
byte buffer[SmallBytes::kMaxLength];
for (word i = length - 1; i >= 0; i--) {
buffer[i] = bytes.byteAt(start + i);
}
return SmallBytes::fromBytes({buffer, length});
}
HandleScope scope(thread);
MutableBytes result(&scope,
thread->runtime()->newMutableBytesUninitialized(length));
result.replaceFromWithStartAt(/*dst_start=*/0, DataArray::cast(*bytes),
length, start);
return result.becomeImmutable();
}
static bool bytesIsValidUTF8Impl(RawBytes bytes, bool allow_surrogates) {
for (word i = 0, length = bytes.length(); i < length;) {
byte b0 = bytes.byteAt(i++);
// ASCII bytes have the topmost bit zero.
static_assert(kMaxASCII == 0x7F, "unexpected kMaxASCII value");
if (b0 <= 0x7F) continue;
// Bytes past this point have the high bit set (0b1xxxxxxx).
// 0b110xxxxx begins a sequence with one continuation byte.
// `b0 < 0b11100000` overestimates and we filter in a 2nd comparison.
if (b0 < 0xE0) {
// b0 < 0xC0 catches 0b10xxxxxx bytes (invalid continuation bytes).
// 0xC0 + 0xC1 (0b11000000 + 0b110000001) would result in range(0x7F)
// which should have been encoded as ASCII.
if (b0 < 0xC2) {
return false;
}
if (i >= length) {
return false;
}
byte b1 = bytes.byteAt(i++);
if (!UTF8::isTrailByte(b1)) {
return false;
}
if (DCHECK_IS_ON()) {
uword decoded =
static_cast<uword>(b0 & 0x1F) << 6 | static_cast<uword>(b1 & 0x3F);
DCHECK(0x80 <= decoded && decoded <= 0x7FF, "unexpected value");
}
// 0b1110xxxx starts a sequence with two continuation bytes.
} else if (b0 < 0xF0) {
if (i + 1 >= length) {
return false;
}
byte b1 = bytes.byteAt(i++);
byte b2 = bytes.byteAt(i++);
if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2)) {
return false;
}
// Catch sequences that should have been encoded in 1-2 bytes instead.
if (b0 == 0xE0) {
if (b1 < 0xA0) {
return false;
}
} else if (!allow_surrogates && b0 == 0xED && b1 >= 0xA0) {
// 0b11011xxxxxxxxxxx (0xD800 - 0xDFFF) is declared invalid by unicode
// as they look like utf-16 surrogates making it easier to detect
// mix-ups.
return false;
}
if (DCHECK_IS_ON()) {
uword decoded = static_cast<uword>(b0 & 0x0F) << 12 |
static_cast<uword>(b1 & 0x3F) << 6 |
static_cast<uword>(b2 & 0x3F);
DCHECK(0x0800 <= decoded && decoded <= 0xFFFF, "unexpected value");
}
static_assert(kMaxUnicode == 0x10FFFF, "unexpected maxunicode value");
// 0b11110xxx starts a sequence with three continuation bytes.
// However values bigger than 0x10FFFF are not valid unicode, so we test
// b0 < 0b11110101 to overestimate that.
} else if (b0 < 0xF5) {
if (i + 2 >= length) {
return false;
}
byte b1 = bytes.byteAt(i++);
byte b2 = bytes.byteAt(i++);
byte b3 = bytes.byteAt(i++);
if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2) ||
!UTF8::isTrailByte(b3)) {
return false;
}
// Catch sequences that should have been encoded with 1-3 bytes instead.
if (b0 == 0xF0) {
if (b1 < 0x90) {
return false;
}
} else if (b0 == 0xF4 && b1 >= 0x90) {
// Bigger than kMaxUnicode.
return false;
}
if (DCHECK_IS_ON()) {
uword decoded = static_cast<uword>(b0 & 0x07) << 16 |
static_cast<uword>(b1 & 0x3F) << 12 |
static_cast<uword>(b2 & 0x3F) << 6 |
static_cast<uword>(b3 & 0x3F);
DCHECK(0x10000 <= decoded && decoded <= kMaxUnicode,
"unexpected value");
}
} else {
// Invalid prefix byte.
return false;
}
}
return true;
}
bool bytesIsValidUTF8(RawBytes bytes) {
return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/false);
}
bool bytesIsValidStr(RawBytes bytes) {
return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/true);
}
// Used only for UserBytesBase as a heap-allocated object.
static const BuiltinAttribute kUserBytesBaseAttributes[] = {
{ID(_UserBytes__value), RawUserBytesBase::kValueOffset,
AttributeFlags::kHidden},
};
static const BuiltinAttribute kBytesIteratorAttributes[] = {
{ID(_bytes_iterator__iterable), RawBytesIterator::kIterableOffset,
AttributeFlags::kHidden},
{ID(_bytes_iterator__index), RawBytesIterator::kIndexOffset,
AttributeFlags::kHidden},
};
void initializeBytesTypes(Thread* thread) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Type bytes(&scope,
addBuiltinType(thread, ID(bytes), LayoutId::kBytes,
/*superclass_id=*/LayoutId::kObject,
kUserBytesBaseAttributes, RawUserBytesBase::kSize,
/*basetype=*/true));
{
Type type(&scope, addImmediateBuiltinType(
thread, ID(largebytes), LayoutId::kLargeBytes,
/*builtin_base=*/LayoutId::kBytes,
/*superclass_id=*/LayoutId::kObject,
/*basetype=*/false));
Layout::cast(type.instanceLayout()).setDescribedType(*bytes);
runtime->setLargeBytesType(type);
}
{
Type type(&scope, addImmediateBuiltinType(
thread, ID(smallbytes), LayoutId::kSmallBytes,
/*builtin_base=*/LayoutId::kBytes,
/*superclass_id=*/LayoutId::kObject,
/*basetype=*/false));
Layout::cast(type.instanceLayout()).setDescribedType(*bytes);
runtime->setSmallBytesType(type);
}
addBuiltinType(thread, ID(bytes_iterator), LayoutId::kBytesIterator,
/*superclass_id=*/LayoutId::kObject, kBytesIteratorAttributes,
BytesIterator::kSize, /*basetype=*/false);
}
RawObject METH(bytes, __add__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Object other_obj(&scope, args.get(1));
if (runtime->isInstanceOfBytes(*other_obj)) {
Bytes other(&scope, bytesUnderlying(*other_obj));
return runtime->bytesConcat(thread, self, other);
}
if (runtime->isInstanceOfBytearray(*other_obj)) {
Bytearray other(&scope, *other_obj);
Bytes other_bytes(&scope, bytearrayAsBytes(thread, other));
return runtime->bytesConcat(thread, self, other_bytes);
}
// TODO(T38246066): buffers besides bytes/bytearray
return thread->raiseWithFmt(LayoutId::kTypeError, "can't concat %T to bytes",
&other_obj);
}
RawObject METH(bytes, __eq__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) == 0);
}
RawObject METH(bytes, __ge__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) >= 0);
}
RawObject METH(bytes, __gt__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) > 0);
}
RawObject METH(bytes, __hash__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
return SmallInt::fromWord(bytesHash(thread, *self));
}
RawObject METH(bytes, __iter__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
return runtime->newBytesIterator(thread, self);
}
RawObject METH(bytes, __le__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) <= 0);
}
RawObject METH(bytes, __len__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
return SmallInt::fromWord(self.length());
}
RawObject METH(bytes, __lt__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) < 0);
}
RawObject METH(bytes, __mul__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object count_index(&scope, args.get(1));
Object count_obj(&scope, intFromIndex(thread, count_index));
if (count_obj.isError()) return *count_obj;
Bytes self(&scope, bytesUnderlying(*self_obj));
word count = intUnderlying(*count_obj).asWordSaturated();
if (!SmallInt::isValid(count)) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"cannot fit '%T' into an index-sized integer",
&count_obj);
}
word length = self.length();
if (count <= 0 || length == 0) {
return Bytes::empty();
}
if (count == 1) {
return *self;
}
word new_length;
if (__builtin_mul_overflow(length, count, &new_length) ||
!SmallInt::isValid(new_length)) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"repeated bytes are too long");
}
return runtime->bytesRepeat(thread, self, length, count);
}
RawObject METH(bytes, __ne__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Object other_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*other_obj)) {
return NotImplementedType::object();
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Bytes other(&scope, bytesUnderlying(*other_obj));
return Bool::fromBool(self.compare(*other) != 0);
}
RawObject METH(bytes, __repr__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Byteslike self(&scope, thread, *self_obj);
return byteslikeReprSmartQuotes(thread, self);
}
RawObject METH(bytes, hex)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*obj)) {
return thread->raiseRequiresType(obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*obj));
return bytesHex(thread, self, self.length());
}
RawObject METH(bytes, isalnum)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isAlnum(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, isalpha)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isAlpha(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, isdigit)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isDigit(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, islower)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isLower(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, isspace)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isSpace(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, istitle)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
bool cased = false;
bool previous_is_cased = false;
for (word i = 0; i < length; i++) {
byte b = self.byteAt(i);
if (ASCII::isUpper(b)) {
if (previous_is_cased) {
return Bool::falseObj();
}
cased = true;
previous_is_cased = true;
} else if (ASCII::isLower(b)) {
if (!previous_is_cased) {
return Bool::falseObj();
}
cased = true;
previous_is_cased = true;
} else {
previous_is_cased = false;
}
}
return Bool::fromBool(cased);
}
RawObject METH(bytes, isupper)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
word length = self.length();
if (length == 0) {
return Bool::falseObj();
}
for (word i = 0; i < length; i++) {
if (!ASCII::isUpper(self.byteAt(i))) {
return Bool::falseObj();
}
}
return Bool::trueObj();
}
RawObject METH(bytes, lower)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self)) {
return thread->raiseRequiresType(self, ID(bytes));
}
self = bytesUnderlying(*self);
if (self.isSmallBytes()) {
SmallBytes small_bytes(&scope, *self);
word length = small_bytes.length();
byte buffer[SmallBytes::kMaxLength];
small_bytes.copyTo(buffer, length);
for (word i = 0; i < length; i++) {
buffer[i] = ASCII::toLower(buffer[i]);
}
return SmallBytes::fromBytes(View<byte>(buffer, length));
}
LargeBytes large_bytes(&scope, *self);
word length = large_bytes.length();
MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
for (word i = 0; i < length; i++) {
result.byteAtPut(i, ASCII::toLower(large_bytes.byteAt(i)));
}
return result.becomeImmutable();
}
RawObject METH(bytes, lstrip)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Object chars_obj(&scope, args.get(1));
if (chars_obj.isNoneType()) {
return bytesStripSpaceLeft(thread, self, self.length());
}
if (runtime->isInstanceOfBytes(*chars_obj)) {
Bytes chars(&scope, bytesUnderlying(*chars_obj));
return bytesStripLeft(thread, self, self.length(), chars, chars.length());
}
if (runtime->isInstanceOfBytearray(*chars_obj)) {
Bytearray chars(&scope, *chars_obj);
Bytes chars_bytes(&scope, chars.items());
return bytesStripLeft(thread, self, self.length(), chars_bytes,
chars.numItems());
}
// TODO(T38246066): support bytes-like objects other than bytes, bytearray
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&chars_obj);
}
RawObject METH(bytes, rstrip)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Object chars_obj(&scope, args.get(1));
if (chars_obj.isNoneType()) {
return bytesStripSpaceRight(thread, self, self.length());
}
if (runtime->isInstanceOfBytes(*chars_obj)) {
Bytes chars(&scope, bytesUnderlying(*chars_obj));
return bytesStripRight(thread, self, self.length(), chars, chars.length());
}
if (runtime->isInstanceOfBytearray(*chars_obj)) {
Bytearray chars(&scope, *chars_obj);
Bytes chars_bytes(&scope, chars.items());
return bytesStripRight(thread, self, self.length(), chars_bytes,
chars.numItems());
}
// TODO(T38246066): support bytes-like objects other than bytes, bytearray
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&chars_obj);
}
RawObject METH(bytes, strip)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Object chars_obj(&scope, args.get(1));
if (chars_obj.isNoneType()) {
return bytesStripSpace(thread, self, self.length());
}
if (runtime->isInstanceOfBytes(*chars_obj)) {
Bytes chars(&scope, bytesUnderlying(*chars_obj));
return bytesStrip(thread, self, self.length(), chars, chars.length());
}
if (runtime->isInstanceOfBytearray(*chars_obj)) {
Bytearray chars(&scope, *chars_obj);
Bytes chars_bytes(&scope, chars.items());
return bytesStrip(thread, self, self.length(), chars_bytes,
chars.numItems());
}
// TODO(T38246066): support bytes-like objects other than bytes, bytearray
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&chars_obj);
}
RawObject METH(bytes, splitlines)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
Object keepends_obj(&scope, args.get(1));
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
if (!runtime->isInstanceOfInt(*keepends_obj)) {
return thread->raiseRequiresType(keepends_obj, ID(int));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
bool keepends = !intUnderlying(*keepends_obj).isZero();
return bytesSplitLines(thread, self, self.length(), keepends);
}
RawObject METH(bytes, translate)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(bytes));
}
Bytes self(&scope, bytesUnderlying(*self_obj));
Object table_obj(&scope, args.get(1));
word table_length;
if (table_obj.isNoneType()) {
table_length = kByteTranslationTableLength;
table_obj = Bytes::empty();
} else if (runtime->isInstanceOfBytes(*table_obj)) {
Bytes bytes(&scope, bytesUnderlying(*table_obj));
table_length = bytes.length();
table_obj = *bytes;
} else if (runtime->isInstanceOfBytearray(*table_obj)) {
Bytearray array(&scope, *table_obj);
table_length = array.numItems();
table_obj = array.items();
} else {
// TODO(T38246066): allow any bytes-like object
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&table_obj);
}
if (table_length != kByteTranslationTableLength) {
return thread->raiseWithFmt(LayoutId::kValueError,
"translation table must be %w characters long",
kByteTranslationTableLength);
}
Bytes table(&scope, *table_obj);
Object del(&scope, args.get(2));
if (runtime->isInstanceOfBytes(*del)) {
Bytes bytes(&scope, bytesUnderlying(*del));
return runtime->bytesTranslate(thread, self, self.length(), table,
table_length, bytes, bytes.length());
}
if (runtime->isInstanceOfBytearray(*del)) {
Bytearray array(&scope, *del);
Bytes bytes(&scope, array.items());
return runtime->bytesTranslate(thread, self, self.length(), table,
table_length, bytes, array.numItems());
}
// TODO(T38246066): allow any bytes-like object
return thread->raiseWithFmt(
LayoutId::kTypeError, "a bytes-like object is required, not '%T'", &del);
}
RawObject METH(bytes, upper)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytes(*self)) {
return thread->raiseRequiresType(self, ID(bytes));
}
self = bytesUnderlying(*self);
if (self.isSmallBytes()) {
SmallBytes small_bytes(&scope, *self);
word length = small_bytes.length();
byte buffer[SmallBytes::kMaxLength];
small_bytes.copyTo(buffer, length);
for (word i = 0; i < length; i++) {
buffer[i] = ASCII::toUpper(buffer[i]);
}
return SmallBytes::fromBytes(View<byte>(buffer, length));
}
LargeBytes large_bytes(&scope, *self);
word length = large_bytes.length();
MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
for (word i = 0; i < length; i++) {
result.byteAtPut(i, ASCII::toUpper(large_bytes.byteAt(i)));
}
return result.becomeImmutable();
}
RawObject METH(bytes_iterator, __iter__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!self.isBytesIterator()) {
return thread->raiseRequiresType(self, ID(bytes_iterator));
}
return *self;
}
RawObject METH(bytes_iterator, __next__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!self.isBytesIterator()) {
return thread->raiseRequiresType(self, ID(bytes_iterator));
}
BytesIterator iter(&scope, *self);
Bytes underlying(&scope, iter.iterable());
word index = iter.index();
if (index >= underlying.length()) {
return thread->raise(LayoutId::kStopIteration, NoneType::object());
}
iter.setIndex(index + 1);
return SmallInt::fromWord(underlying.byteAt(index));
}
RawObject METH(bytes_iterator, __length_hint__)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!self.isBytesIterator()) {
return thread->raiseRequiresType(self, ID(bytes_iterator));
}
BytesIterator iter(&scope, *self);
Bytes underlying(&scope, iter.iterable());
return SmallInt::fromWord(underlying.length() - iter.index());
}
} // namespace py