static bool CUnescapeInternal()

in src/kudu/gutil/strings/escaping.cc [258:438]


static bool CUnescapeInternal(const StringPiece& source,
                              bool leave_nulls_escaped,
                              char* dest,
                              int* dest_len,
                              string* error) {
  char* d = dest;
  const char* p = source.data();
  const char* end = source.end();
  const char* last_byte = end - 1;

  // Small optimization for case where source = dest and there's no escaping
  while (p == d && p < end && *p != '\\')
    p++, d++;

  while (p < end) {
    if (*p != '\\') {
      *d++ = *p++;
    } else {
      if (++p > last_byte) {       // skip past the '\\'
        if (error) *error = "String cannot end with \\";
        return false;
      }
      switch (*p) {
        case 'a':  *d++ = '\a';  break;
        case 'b':  *d++ = '\b';  break;
        case 'f':  *d++ = '\f';  break;
        case 'n':  *d++ = '\n';  break;
        case 'r':  *d++ = '\r';  break;
        case 't':  *d++ = '\t';  break;
        case 'v':  *d++ = '\v';  break;
        case '\\': *d++ = '\\';  break;
        case '?':  *d++ = '\?';  break;    // \?  Who knew?
        case '\'': *d++ = '\'';  break;
        case '"':  *d++ = '\"';  break;
        case '0': case '1': case '2': case '3':  // octal digit: 1 to 3 digits
        case '4': case '5': case '6': case '7': {
          const char *octal_start = p;
          unsigned int ch = *p - '0';
          if (p < last_byte && IS_OCTAL_DIGIT(p[1]))
            ch = ch * 8 + *++p - '0';
          if (p < last_byte && IS_OCTAL_DIGIT(p[1]))
            ch = ch * 8 + *++p - '0';      // now points at last digit
          if (ch > 0xff) {
            if (error) {
              *error = "Value of \\" +
                  string(octal_start, p + 1 - octal_start) +
                  " exceeds 0xff";
            }
            return false;
          }
          if ((ch == 0) && leave_nulls_escaped) {
            // Copy the escape sequence for the null character
            const int octal_size = p + 1 - octal_start;
            *d++ = '\\';
            memcpy(d, octal_start, octal_size);
            d += octal_size;
            break;
          }
          *d++ = ch;
          break;
        }
        case 'x': case 'X': {
          if (p >= last_byte) {
            if (error) *error = "String cannot end with \\x";
            return false;
          } else if (!ascii_isxdigit(p[1])) {
            if (error) *error = "\\x cannot be followed by a non-hex digit";
            return false;
          }
          unsigned int ch = 0;
          const char *hex_start = p;
          while (p < last_byte && ascii_isxdigit(p[1]))
            // Arbitrarily many hex digits
            ch = (ch << 4) + hex_digit_to_int(*++p);
          if (ch > 0xFF) {
            if (error) {
              *error = "Value of \\" + string(hex_start, p + 1 - hex_start) +
                  " exceeds 0xff";
            }
            return false;
          }
          if ((ch == 0) && leave_nulls_escaped) {
            // Copy the escape sequence for the null character
            const int hex_size = p + 1 - hex_start;
            *d++ = '\\';
            memcpy(d, hex_start, hex_size);
            d += hex_size;
            break;
          }
          *d++ = ch;
          break;
        }
        case 'u': {
          // \uhhhh => convert 4 hex digits to UTF-8
          char32 rune = 0;
          const char *hex_start = p;
          if (p + 4 >= end) {
            if (error) {
              *error = "\\u must be followed by 4 hex digits: \\" +
                  string(hex_start, p + 1 - hex_start);
            }
            return false;
          }
          for (int i = 0; i < 4; ++i) {
            // Look one char ahead.
            if (ascii_isxdigit(p[1])) {
              rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
            } else {
              if (error) {
                *error = "\\u must be followed by 4 hex digits: \\" +
                    string(hex_start, p + 1 - hex_start);
              }
              return false;
            }
          }
          if ((rune == 0) && leave_nulls_escaped) {
            // Copy the escape sequence for the null character
            *d++ = '\\';
            memcpy(d, hex_start, 5);  // u0000
            d += 5;
            break;
          }
          d += runetochar(d, &rune);
          break;
        }
        case 'U': {
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8
          char32 rune = 0;
          const char *hex_start = p;
          if (p + 8 >= end) {
            if (error) {
              *error = "\\U must be followed by 8 hex digits: \\" +
                  string(hex_start, p + 1 - hex_start);
            }
            return false;
          }
          for (int i = 0; i < 8; ++i) {
            // Look one char ahead.
            if (ascii_isxdigit(p[1])) {
              // Don't change rune until we're sure this
              // is within the Unicode limit, but do advance p.
              char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
              if (newrune > 0x10FFFF) {
                if (error) {
                  *error = "Value of \\" +
                      string(hex_start, p + 1 - hex_start) +
                      " exceeds Unicode limit (0x10FFFF)";
                }
                return false;
              } else {
                rune = newrune;
              }
            } else {
              if (error) {
                *error = "\\U must be followed by 8 hex digits: \\" +
                    string(hex_start, p + 1 - hex_start);
              }
              return false;
            }
          }
          if ((rune == 0) && leave_nulls_escaped) {
            // Copy the escape sequence for the null character
            *d++ = '\\';
            memcpy(d, hex_start, 9);  // U00000000
            d += 9;
            break;
          }
          d += runetochar(d, &rune);
          break;
        }
        default: {
          if (error) *error = string("Unknown escape sequence: \\") + *p;
          return false;
        }
      }
      p++;                                 // read past letter we escaped
    }
  }
  *dest_len = d - dest;
  return true;
}