in runtime/str-builtins.cpp [1301:1413]
RawObject METH(str, __repr__)(Thread* thread, Arguments args) {
Runtime* runtime = thread->runtime();
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfStr(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(str));
}
Str self(&scope, strUnderlying(*self_obj));
const word self_len = self.length();
word result_len = 0;
word squote = 0;
word dquote = 0;
// Precompute the size so that only one allocation is necessary.
for (word i = 0, char_len; i < self_len; i += char_len) {
int32_t code_point = self.codePointAt(i, &char_len);
if (code_point == '\'') {
squote++;
result_len += 1;
} else if (code_point == '"') {
dquote++;
result_len += 1;
} else if (code_point == '\\' || code_point == '\t' || code_point == '\r' ||
code_point == '\n') {
result_len += 2;
} else if (Unicode::isPrintable(code_point)) {
result_len += char_len;
} else if (code_point < 0x100) {
result_len += 4;
} else if (code_point < 0x10000) {
result_len += 6;
} else {
result_len += 10;
}
}
byte quote = '\'';
bool unchanged = (result_len == self_len);
if (squote > 0) {
unchanged = false;
// If there are both single quotes and double quotes, the outer quote will
// be singles, and all internal quotes will need to be escaped.
if (dquote > 0) {
// Add the size of the escape backslashes on the single quotes.
result_len += squote;
} else {
quote = '"';
}
}
result_len += 2; // quotes
MutableBytes buf(&scope, runtime->newMutableBytesUninitialized(result_len));
buf.byteAtPut(0, quote);
buf.byteAtPut(result_len - 1, quote);
if (unchanged) {
// Remaining characters were unmodified, so copy them directly.
buf.replaceFromWithStr(1, *self, self_len);
return buf.becomeStr();
}
word out = 1;
for (word in = 0, char_len; in < self_len; in += char_len) {
int32_t code_point = self.codePointAt(in, &char_len);
if (code_point == quote) {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, quote);
} else if (code_point == '\\') {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, '\\');
} else if (code_point == '\t') {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 't');
} else if (code_point == '\r') {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'r');
} else if (code_point == '\n') {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'n');
} else if (' ' <= code_point && code_point < kMaxASCII) {
buf.byteAtPut(out++, code_point);
} else if (code_point <= kMaxASCII) {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'x');
uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
code_point);
out += 2;
} else if (Unicode::isPrintable(code_point)) {
for (word i = 0; i < char_len; i++) {
buf.byteAtPut(out + i, self.byteAt(in + i));
}
out += char_len;
} else if (code_point <= 0xff) {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'x');
uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
code_point);
out += 2;
} else if (code_point <= 0xffff) {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'u');
uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/4,
code_point);
out += 4;
} else {
buf.byteAtPut(out++, '\\');
buf.byteAtPut(out++, 'U');
uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out,
/*num_digits=*/8, code_point);
out += 8;
}
}
DCHECK(out == result_len - 1, "wrote %ld characters, expected %ld", out - 1,
result_len - 2);
return buf.becomeStr();
}