static RawObject maybeDecode()

in runtime/under-json-module.cpp [139:193]


static RawObject maybeDecode(Thread* thread, const Object& s,
                             const Bytes& bytes, word length, word* next) {
  // Cannot guess with just 0 or 1 bytes. Assume it's UTF-8.
  if (length < 2) return *bytes;

  // Search for BOM sequences. If there are none, search for `0` bytes which
  // are a strong sign for the high bits of UTF-16/UTF-32 encodings, since
  // legal JSON must start with an ASCII character with high byte(s) zero.
  // The code looks at the first 2 bytes to detect UTF-16 and the first 4
  // bytes to detect UTF-32.
  const char* encoding;
  byte b0 = bytes.byteAt(0);
  byte b1 = bytes.byteAt(1);
  if (b0 == UTF8::kBOM[0] && b1 == UTF8::kBOM[1] && length >= 3 &&
      bytes.byteAt(2) == UTF8::kBOM[2]) {
    *next += 3;
    return *bytes;
  }
  if (b0 == UTF32::kBOMLittleEndian[0] && b1 == UTF32::kBOMLittleEndian[1] &&
      length >= 4 && bytes.byteAt(2) == UTF32::kBOMLittleEndian[2] &&
      bytes.byteAt(3) == UTF32::kBOMLittleEndian[3]) {
    encoding = "utf-32";
  } else if (b0 == UTF32::kBOMBigEndian[0] && b1 == UTF32::kBOMBigEndian[1] &&
             length >= 4 && bytes.byteAt(2) == UTF32::kBOMBigEndian[2] &&
             bytes.byteAt(3) == UTF32::kBOMBigEndian[3]) {
    encoding = "utf-32";
  } else if (b0 == UTF16::kBOMLittleEndian[0] &&
             b1 == UTF16::kBOMLittleEndian[1]) {
    encoding = "utf-16";
  } else if (b0 == UTF16::kBOMBigEndian[0] && b1 == UTF16::kBOMBigEndian[1]) {
    encoding = "utf-16";
  } else if (b0 == 0) {
    if (b1 == 0 && length >= 4) {
      encoding = "utf-32-be";
    } else {
      encoding = "utf-16-be";
    }
  } else if (b1 == 0) {
    DCHECK(b0 != 0, "Expected b0 != 0");
    if (length >= 4 && bytes.byteAt(2) == 0 && bytes.byteAt(3) == 0) {
      encoding = "utf-32-le";
    } else {
      encoding = "utf-16-le";
    }
  } else {
    // Default to UTF-8 which the decoder handles naturally.
    return *bytes;
  }

  HandleScope scope(thread);
  Object encoding_str(&scope, Runtime::internStrFromCStr(thread, encoding));
  Object errors(&scope, Runtime::internStrFromCStr(thread, "surrogatepass"));
  return thread->invokeFunction3(ID(_codecs), ID(decode), s, encoding_str,
                                 errors);
}