in src/kudu/gutil/strings/escaping.cc [258:438]
static bool CUnescapeInternal(const StringPiece& source,
bool leave_nulls_escaped,
char* dest,
int* dest_len,
string* error) {
char* d = dest;
const char* p = source.data();
const char* end = source.end();
const char* last_byte = end - 1;
// Small optimization for case where source = dest and there's no escaping
while (p == d && p < end && *p != '\\')
p++, d++;
while (p < end) {
if (*p != '\\') {
*d++ = *p++;
} else {
if (++p > last_byte) { // skip past the '\\'
if (error) *error = "String cannot end with \\";
return false;
}
switch (*p) {
case 'a': *d++ = '\a'; break;
case 'b': *d++ = '\b'; break;
case 'f': *d++ = '\f'; break;
case 'n': *d++ = '\n'; break;
case 'r': *d++ = '\r'; break;
case 't': *d++ = '\t'; break;
case 'v': *d++ = '\v'; break;
case '\\': *d++ = '\\'; break;
case '?': *d++ = '\?'; break; // \? Who knew?
case '\'': *d++ = '\''; break;
case '"': *d++ = '\"'; break;
case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
case '4': case '5': case '6': case '7': {
const char *octal_start = p;
unsigned int ch = *p - '0';
if (p < last_byte && IS_OCTAL_DIGIT(p[1]))
ch = ch * 8 + *++p - '0';
if (p < last_byte && IS_OCTAL_DIGIT(p[1]))
ch = ch * 8 + *++p - '0'; // now points at last digit
if (ch > 0xff) {
if (error) {
*error = "Value of \\" +
string(octal_start, p + 1 - octal_start) +
" exceeds 0xff";
}
return false;
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
const int octal_size = p + 1 - octal_start;
*d++ = '\\';
memcpy(d, octal_start, octal_size);
d += octal_size;
break;
}
*d++ = ch;
break;
}
case 'x': case 'X': {
if (p >= last_byte) {
if (error) *error = "String cannot end with \\x";
return false;
} else if (!ascii_isxdigit(p[1])) {
if (error) *error = "\\x cannot be followed by a non-hex digit";
return false;
}
unsigned int ch = 0;
const char *hex_start = p;
while (p < last_byte && ascii_isxdigit(p[1]))
// Arbitrarily many hex digits
ch = (ch << 4) + hex_digit_to_int(*++p);
if (ch > 0xFF) {
if (error) {
*error = "Value of \\" + string(hex_start, p + 1 - hex_start) +
" exceeds 0xff";
}
return false;
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
const int hex_size = p + 1 - hex_start;
*d++ = '\\';
memcpy(d, hex_start, hex_size);
d += hex_size;
break;
}
*d++ = ch;
break;
}
case 'u': {
// \uhhhh => convert 4 hex digits to UTF-8
char32 rune = 0;
const char *hex_start = p;
if (p + 4 >= end) {
if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
string(hex_start, p + 1 - hex_start);
}
return false;
}
for (int i = 0; i < 4; ++i) {
// Look one char ahead.
if (ascii_isxdigit(p[1])) {
rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
} else {
if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
string(hex_start, p + 1 - hex_start);
}
return false;
}
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
*d++ = '\\';
memcpy(d, hex_start, 5); // u0000
d += 5;
break;
}
d += runetochar(d, &rune);
break;
}
case 'U': {
// \Uhhhhhhhh => convert 8 hex digits to UTF-8
char32 rune = 0;
const char *hex_start = p;
if (p + 8 >= end) {
if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
string(hex_start, p + 1 - hex_start);
}
return false;
}
for (int i = 0; i < 8; ++i) {
// Look one char ahead.
if (ascii_isxdigit(p[1])) {
// Don't change rune until we're sure this
// is within the Unicode limit, but do advance p.
char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
if (newrune > 0x10FFFF) {
if (error) {
*error = "Value of \\" +
string(hex_start, p + 1 - hex_start) +
" exceeds Unicode limit (0x10FFFF)";
}
return false;
} else {
rune = newrune;
}
} else {
if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
string(hex_start, p + 1 - hex_start);
}
return false;
}
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
*d++ = '\\';
memcpy(d, hex_start, 9); // U00000000
d += 9;
break;
}
d += runetochar(d, &rune);
break;
}
default: {
if (error) *error = string("Unknown escape sequence: \\") + *p;
return false;
}
}
p++; // read past letter we escaped
}
}
*dest_len = d - dest;
return true;
}