in runtime/under-codecs-module.cpp [1239:1345]
RawObject FUNC(_codecs, backslashreplace_errors)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object error(&scope, args.get(0));
Object object(&scope, NoneType::object());
word start;
word end;
if (runtime->isInstanceOfUnicodeDecodeError(*error)) {
UnicodeErrorBase unicode_error(&scope, *error);
start = SmallInt::cast(unicode_error.start()).value();
end = SmallInt::cast(unicode_error.end()).value();
object = unicode_error.object();
if (!runtime->isInstanceOfBytes(*object)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"object attribute must be bytes");
}
Bytes bytes(&scope, bytesUnderlying(*object));
word length = bytes.length();
if (start >= length) start = length - 1;
if (start < 0) start = 0;
if (end >= length) end = length;
if (end < 1) end = 1;
word result_size = end - start;
if (result_size < 0) {
return thread->raiseWithFmt(LayoutId::kValueError, "end before start");
}
result_size *= 4;
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(result_size));
word pos = 0;
for (word i = start; i < end; i++) {
byte b = bytes.byteAt(i);
result.byteAtPut(pos++, '\\');
result.byteAtPut(pos++, 'x');
uwordToHexadecimalWithMutableBytes(*result, pos, /*num_digits=*/2, b);
pos += 2;
}
DCHECK(pos == result.length(), "size mismatch");
Object result_str(&scope, result.becomeStr());
Object end_obj(&scope, SmallInt::fromWord(end));
return runtime->newTupleWith2(result_str, end_obj);
}
if (runtime->isInstanceOfUnicodeEncodeError(*error) ||
runtime->isInstanceOfUnicodeTranslateError(*error)) {
UnicodeErrorBase unicode_error(&scope, *error);
start = SmallInt::cast(unicode_error.start()).value();
end = SmallInt::cast(unicode_error.end()).value();
object = unicode_error.object();
if (!runtime->isInstanceOfStr(*object)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"object attribute must be unicode");
}
Str str(&scope, strUnderlying(*object));
if (start < 0) start = 0;
if (end < 1) end = 1;
if (end < start) {
return thread->raiseWithFmt(LayoutId::kValueError, "end before start");
}
word start_byte = str.offsetByCodePoints(0, start);
word end_byte = str.offsetByCodePoints(start_byte, end - start);
word result_size = 0;
for (word i = start_byte; i < end_byte;) {
word num_bytes;
int32_t cp = str.codePointAt(i, &num_bytes);
i += num_bytes;
if (cp > kMaxUint16) {
result_size += 10; // Will replace with `\Uxxxxxxxx`
} else if (cp > kMaxByte) {
result_size += 6; // Will replace with `\uxxxx`
} else {
result_size += 4; // Will replace with `\xyy`
}
}
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(result_size));
word pos = 0;
for (word i = start_byte; i < end_byte;) {
word num_bytes;
int32_t cp = str.codePointAt(i, &num_bytes);
i += num_bytes;
result.byteAtPut(pos++, '\\');
if (cp > kMaxUint16) {
result.byteAtPut(pos++, 'U');
uwordToHexadecimalWithMutableBytes(*result, pos, /*num_digits=*/8, cp);
pos += 8;
} else if (cp > kMaxByte) {
result.byteAtPut(pos++, 'u');
uwordToHexadecimalWithMutableBytes(*result, pos, /*num_digits=*/4, cp);
pos += 4;
} else {
result.byteAtPut(pos++, 'x');
uwordToHexadecimalWithMutableBytes(*result, pos, /*num_digits=*/2, cp);
pos += 2;
}
}
DCHECK(pos == result.length(), "size mismatch");
Object result_bytes(&scope, result.becomeStr());
Object end_obj(&scope, SmallInt::fromWord(end));
return runtime->newTupleWith2(result_bytes, end_obj);
}
return thread->raiseWithFmt(LayoutId::kTypeError,
"don't know how to handle %T in error callback",
&error);
}