RawObject METH()

in runtime/str-builtins.cpp [1301:1413]


RawObject METH(str, __repr__)(Thread* thread, Arguments args) {
  Runtime* runtime = thread->runtime();
  HandleScope scope(thread);
  Object self_obj(&scope, args.get(0));
  if (!runtime->isInstanceOfStr(*self_obj)) {
    return thread->raiseRequiresType(self_obj, ID(str));
  }
  Str self(&scope, strUnderlying(*self_obj));
  const word self_len = self.length();
  word result_len = 0;
  word squote = 0;
  word dquote = 0;
  // Precompute the size so that only one allocation is necessary.
  for (word i = 0, char_len; i < self_len; i += char_len) {
    int32_t code_point = self.codePointAt(i, &char_len);
    if (code_point == '\'') {
      squote++;
      result_len += 1;
    } else if (code_point == '"') {
      dquote++;
      result_len += 1;
    } else if (code_point == '\\' || code_point == '\t' || code_point == '\r' ||
               code_point == '\n') {
      result_len += 2;
    } else if (Unicode::isPrintable(code_point)) {
      result_len += char_len;
    } else if (code_point < 0x100) {
      result_len += 4;
    } else if (code_point < 0x10000) {
      result_len += 6;
    } else {
      result_len += 10;
    }
  }

  byte quote = '\'';
  bool unchanged = (result_len == self_len);
  if (squote > 0) {
    unchanged = false;
    // If there are both single quotes and double quotes, the outer quote will
    // be singles, and all internal quotes will need to be escaped.
    if (dquote > 0) {
      // Add the size of the escape backslashes on the single quotes.
      result_len += squote;
    } else {
      quote = '"';
    }
  }
  result_len += 2;  // quotes

  MutableBytes buf(&scope, runtime->newMutableBytesUninitialized(result_len));
  buf.byteAtPut(0, quote);
  buf.byteAtPut(result_len - 1, quote);
  if (unchanged) {
    // Remaining characters were unmodified, so copy them directly.
    buf.replaceFromWithStr(1, *self, self_len);
    return buf.becomeStr();
  }
  word out = 1;
  for (word in = 0, char_len; in < self_len; in += char_len) {
    int32_t code_point = self.codePointAt(in, &char_len);
    if (code_point == quote) {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, quote);
    } else if (code_point == '\\') {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, '\\');
    } else if (code_point == '\t') {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 't');
    } else if (code_point == '\r') {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'r');
    } else if (code_point == '\n') {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'n');
    } else if (' ' <= code_point && code_point < kMaxASCII) {
      buf.byteAtPut(out++, code_point);
    } else if (code_point <= kMaxASCII) {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'x');
      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
                                         code_point);
      out += 2;
    } else if (Unicode::isPrintable(code_point)) {
      for (word i = 0; i < char_len; i++) {
        buf.byteAtPut(out + i, self.byteAt(in + i));
      }
      out += char_len;
    } else if (code_point <= 0xff) {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'x');
      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
                                         code_point);
      out += 2;
    } else if (code_point <= 0xffff) {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'u');
      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/4,
                                         code_point);
      out += 4;
    } else {
      buf.byteAtPut(out++, '\\');
      buf.byteAtPut(out++, 'U');
      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out,
                                         /*num_digits=*/8, code_point);
      out += 8;
    }
  }
  DCHECK(out == result_len - 1, "wrote %ld characters, expected %ld", out - 1,
         result_len - 2);
  return buf.becomeStr();
}