static int32_t decodeUnicodeEscaped()

in runtime/under-codecs-module.cpp [438:558]


static int32_t decodeUnicodeEscaped(const Byteslike& bytes, word* i,
                                    word* invalid_escape_index,
                                    const char** error_message) {
  switch (byte ch = bytes.byteAt((*i)++)) {
    // \x escapes
    case '\n':
      return -1;
    case '\\':
    case '\'':
    case '\"':
      return ch;
    case 'b':
      return '\b';
    case 't':
      return '\t';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    // BEL
    case 'a':
      return '\007';
    // FF
    case 'f':
      return '\014';
    // VT
    case 'v':
      return '\013';

    // \OOO (octal) escapes
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7': {
      word escaped = ch - '0';
      word octal_index = *i;
      word length = bytes.length();
      if (octal_index < length) {
        word ch2 = bytes.byteAt(octal_index);
        if ('0' <= ch2 && ch2 <= '7') {
          escaped = (escaped << 3) + ch2 - '0';
          if (++octal_index < length) {
            word ch3 = bytes.byteAt(octal_index);
            if ('0' <= ch3 && ch3 <= '7') {
              octal_index++;
              escaped = (escaped << 3) + ch3 - '0';
            }
          }
        }
      }
      *i = octal_index;
      return escaped;
    }

    // hex escapes
    // \xXX
    case 'x': {
      word escaped;
      if ((escaped = decodeHexEscaped(bytes, i, 2)) < 0) {
        *error_message = (escaped == -1 ? "truncated \\xXX escape"
                                        : "illegal Unicode character");
        return -1;
      }
      return escaped;
    }

    // \uXXXX
    case 'u': {
      word escaped;
      if ((escaped = decodeHexEscaped(bytes, i, 4)) < 0) {
        *error_message = (escaped == -1 ? "truncated \\uXXXX escape"
                                        : "illegal Unicode character");
        return -1;
      }
      return escaped;
    }

    // \UXXXXXXXX
    case 'U': {
      word escaped;
      if ((escaped = decodeHexEscaped(bytes, i, 8)) < 0) {
        *error_message = (escaped == -1 ? "truncated \\uXXXXXXXX escape"
                                        : "illegal Unicode character");
        return -1;
      }
      return escaped;
    }

    // \N{name}
    case 'N': {
      *error_message = "malformed \\N character escape";
      word length = bytes.length();
      if (*i >= length || bytes.byteAt(*i) != '{') {
        return -1;
      }
      word start = ++(*i);
      while (*i < length && bytes.byteAt(*i) != '}') {
        *i += 1;
      }
      word size = *i - start;
      if (size == 0 || *i == length) {
        return -1;
      }
      *i += 1;
      *error_message = "unknown Unicode character name";

      unique_c_ptr<byte> buffer(reinterpret_cast<byte*>(std::malloc(size)));
      bytes.copyToStartAt(buffer.get(), size, start);
      return codePointFromName(buffer.get(), size);
    }

    default: {
      *invalid_escape_index = *i - 1;
      return ch;
    }
  }
}