in hphp/runtime/base/zend-scanf.cpp [567:1071]
int string_sscanf(const char *string, const char *format, int numVars,
Variant &return_value) {
int nconversions;
int totalVars = -1;
int64_t value;
char *end;
const char *baseString;
char op = 0;
int base = 0;
int underflow = 0;
size_t width;
long (*fn)(const char *, char **, int) = nullptr;
const char *ch;
char sch;
int flags;
char buf[64]; /* Temporary buffer to hold scanned number
* strings before they are passed to strtoul() */
Array returnArray;
/*
* Check for errors in the format string.
*/
if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
scan_set_error_return(numVars, return_value);
return SCAN_ERROR_INVALID_FORMAT;
}
baseString = string;
/*
* Iterate over the format string filling in the result objects until
* we reach the end of input, the end of the format string, or there
* is a mismatch.
*/
nconversions = 0;
while (*format != '\0') {
ch = format++;
flags = 0;
/*
* If we see whitespace in the format, skip whitespace in the string.
*/
if ( isspace( (int)*ch ) ) {
sch = *string;
while ( isspace( (int)sch ) ) {
if (*string == '\0') {
goto done;
}
string++;
sch = *string;
}
continue;
}
if (*ch != '%') {
literal:
if (*string == '\0') {
underflow = 1;
goto done;
}
sch = *string;
string++;
if (*ch != sch) {
goto done;
}
continue;
}
ch = format++;
if (*ch == '%') {
goto literal;
}
/*
* Check for assignment suppression ('*') or an XPG3-style
* assignment ('%n$').
*/
if (*ch == '*') {
flags |= SCAN_SUPPRESS;
ch = format++;
} else if ( isdigit(UCHAR(*ch))) {
value = strtoul(format-1, &end, 10);
if (*end == '$') {
format = end+1;
ch = format++;
}
}
/*
* Parse any width specifier.
*/
if ( isdigit(UCHAR(*ch))) {
char *endptr;
width = strtoul(format-1, &endptr, 10);
format = endptr;
ch = format++;
} else {
width = 0;
}
/*
* Ignore size specifier.
*/
if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
ch = format++;
}
/*
* Handle the various field types.
*/
switch (*ch) {
case 'n':
if (!(flags & SCAN_SUPPRESS)) {
auto const key = safe_cast<int64_t>(returnArray.size());
returnArray.set(key, (int)(string - baseString));
}
nconversions++;
continue;
case 'd':
case 'D':
op = 'i';
base = 10;
fn = (long (*)(const char *, char **, int))strtol;
break;
case 'i':
op = 'i';
base = 0;
fn = (long (*)(const char *, char **, int))strtol;
break;
case 'o':
op = 'i';
base = 8;
fn = (long (*)(const char *, char **, int))strtol;
break;
case 'x':
case 'X':
op = 'i';
base = 16;
fn = (long (*)(const char *, char **, int))strtol;
break;
case 'u':
op = 'i';
base = 10;
flags |= SCAN_UNSIGNED;
fn = (long (*)(const char *, char **, int))strtoul;
break;
case 'f':
case 'e':
case 'E':
case 'g':
op = 'f';
break;
case 's':
op = 's';
break;
case 'c':
op = 's';
flags |= SCAN_NOSKIP;
/*-cc-*/
if (0 == width) {
width = 1;
}
/*-cc-*/
break;
case '[':
op = '[';
flags |= SCAN_NOSKIP;
break;
} /* switch */
/*
* At this point, we will need additional characters from the
* string to proceed.
*/
if (*string == '\0') {
underflow = 1;
goto done;
}
/*
* Skip any leading whitespace at the beginning of a field unless
* the format suppresses this behavior.
*/
if (!(flags & SCAN_NOSKIP)) {
while (*string != '\0') {
sch = *string;
if (! isspace((int)sch) ) {
break;
}
string++;
}
if (*string == '\0') {
underflow = 1;
goto done;
}
}
/*
* Perform the requested scanning operation.
*/
switch (op) {
case 'c':
case 's':
/*
* Scan a string up to width characters or whitespace.
*/
if (width == 0) {
width = (size_t) ~0;
}
end = (char*)string;
while (*end != '\0') {
sch = *end;
if ( isspace( (int)sch ) ) {
break;
}
end++;
if (--width == 0) {
break;
}
}
if (!(flags & SCAN_SUPPRESS)) {
auto const key = safe_cast<int64_t>(returnArray.size());
returnArray.set(key, String(string, end-string, CopyString));
}
string = end;
break;
case '[': {
CharSet cset;
if (width == 0) {
width = (size_t) ~0;
}
end = (char*)string;
format = BuildCharSet(&cset, format);
while (*end != '\0') {
sch = *end;
if (!CharInSet(&cset, (int)sch)) {
break;
}
end++;
if (--width == 0) {
break;
}
}
ReleaseCharSet(&cset);
if (string == end) {
/*
* Nothing matched the range, stop processing
*/
goto done;
}
if (!(flags & SCAN_SUPPRESS)) {
auto const key = safe_cast<int64_t>(returnArray.size());
returnArray.set(key, String(string, end-string, CopyString));
}
string = end;
break;
}
case 'i':
/*
* Scan an unsigned or signed integer.
*/
/*-cc-*/
buf[0] = '\0';
/*-cc-*/
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
for (end = buf; width > 0; width--) {
switch (*string) {
/*
* The 0 digit has special meaning at the beginning of
* a number. If we are unsure of the base, it
* indicates that we are in base 8 or base 16 (if it is
* followed by an 'x').
*/
case '0':
/*-cc-*/
if (base == 16) {
flags |= SCAN_XOK;
}
/*-cc-*/
if (base == 0) {
base = 8;
flags |= SCAN_XOK;
}
if (flags & SCAN_NOZERO) {
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
} else {
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
}
goto addToInt;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
if (base == 0) {
base = 10;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '8': case '9':
if (base == 0) {
base = 10;
}
if (base <= 8) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c':
case 'd': case 'e': case 'f':
if (base <= 10) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '+': case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToInt;
}
break;
case 'x': case 'X':
if ((flags & SCAN_XOK) && (end == buf+1)) {
base = 16;
flags &= ~SCAN_XOK;
goto addToInt;
}
break;
}
/*
* We got an illegal character so we are done accumulating.
*/
break;
addToInt:
/*
* Add the character to the temporary buffer.
*/
*end++ = *string++;
if (*string == '\0') {
break;
}
}
/*
* Check to see if we need to back up because we only got a
* sign or a trailing x after a 0.
*/
if (flags & SCAN_NODIGITS) {
if (*string == '\0') {
underflow = 1;
}
goto done;
} else if (end[-1] == 'x' || end[-1] == 'X') {
end--;
string--;
}
/*
* Scan the value from the temporary buffer. If we are
* returning a large unsigned value, we have to convert it back
* to a string since PHP only supports signed values.
*/
if (!(flags & SCAN_SUPPRESS)) {
*end = '\0';
value = (int64_t) (*fn)(buf, nullptr, base);
auto const key = safe_cast<int64_t>(returnArray.size());
if ((flags & SCAN_UNSIGNED) && (value < 0)) {
snprintf(buf, sizeof(buf), "%lu", (long)value); /* INTL: ISO digit */
returnArray.set(key, String(buf, CopyString));
} else {
returnArray.set(key, value);
}
}
break;
case 'f':
/*
* Scan a floating point number
*/
buf[0] = '\0'; /* call me pedantic */
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
for (end = buf; width > 0; width--) {
switch (*string) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9':
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
goto addToFloat;
case '+':
case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToFloat;
}
break;
case '.':
if (flags & SCAN_PTOK) {
flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
goto addToFloat;
}
break;
case 'e':
case 'E':
/*
* An exponent is not allowed until there has
* been at least one digit.
*/
if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
| SCAN_SIGNOK | SCAN_NODIGITS;
goto addToFloat;
}
break;
}
/*
* We got an illegal character so we are done accumulating.
*/
break;
addToFloat:
/*
* Add the character to the temporary buffer.
*/
*end++ = *string++;
if (*string == '\0') {
break;
}
}
/*
* Check to see if we need to back up because we saw a
* trailing 'e' or sign.
*/
if (flags & SCAN_NODIGITS) {
if (flags & SCAN_EXPOK) {
/*
* There were no digits at all so scanning has
* failed and we are done.
*/
if (*string == '\0') {
underflow = 1;
}
goto done;
}
/*
* We got a bad exponent ('e' and maybe a sign).
*/
end--;
string--;
if (*end != 'e' && *end != 'E') {
end--;
string--;
}
}
/*
* Scan the value from the temporary buffer.
*/
if (!(flags & SCAN_SUPPRESS)) {
double dvalue;
*end = '\0';
dvalue = strtod(buf, nullptr);
auto const key = safe_cast<int64_t>(returnArray.size());
returnArray.set(key, dvalue);
}
break;
} /* switch (op) */
nconversions++;
} /* while (*format != '\0') */
done:
if (underflow && (0==nconversions)) {
scan_set_error_return(numVars, return_value);
return SCAN_ERROR_EOF;
} else if (nconversions < totalVars) {
/* TODO: not all elements converted. we need to prune the list - cc */
}
return_value = returnArray;
return SCAN_SUCCESS;
}