in hphp/runtime/base/zend-pack.cpp [547:964]
Variant ZendPack::unpack(const String& fmt, const String& data) {
const char *format = fmt.c_str();
int formatlen = fmt.size();
const char *input = data.c_str();
int inputlen = data.size();
int inputpos = 0;
Array ret = Array::CreateDict();
while (formatlen-- > 0) {
char type = *(format++);
int arg = 1, argb;
const char *name;
int namelen;
int size=0;
/* Handle format arguments if any */
if (formatlen > 0) {
char c = *format;
if (c >= '0' && c <= '9') {
arg = atoi(format);
while (formatlen > 0 && *format >= '0' && *format <= '9') {
format++;
formatlen--;
}
} else if (c == '*') {
arg = -1;
format++;
formatlen--;
}
}
/* Get of new value in array */
name = format;
argb = arg;
while (formatlen > 0 && *format != '/') {
formatlen--;
format++;
}
namelen = format - name;
if (namelen > 200)
namelen = 200;
switch ((int) type) {
/* Never use any input */
case 'X':
size = -1;
if (arg < 0) {
raise_invalid_argument_warning("Type %c: '*' ignored", type);
arg = 1;
}
break;
case '@':
size = 0;
break;
case 'a':
case 'A':
case 'Z':
size = arg;
arg = 1;
break;
case 'h':
case 'H':
size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg;
arg = 1;
break;
/* Use 1 byte of input */
case 'c':
case 'C':
case 'x':
size = 1;
break;
/* Use 2 bytes of input */
case 's':
case 'S':
case 'n':
case 'v':
size = 2;
break;
/* Use machine dependent bytes of input */
case 'i':
case 'I':
size = sizeof(int);
break;
/* Use 4 bytes of input */
case 'l':
case 'L':
case 'N':
case 'V':
size = 4;
break;
/* Use 8 bytes of input */
case 'q':
case 'Q':
case 'J':
case 'P':
size = 8;
break;
/* Use sizeof(float) bytes of input */
case 'f':
size = sizeof(float);
break;
/* Use sizeof(double) bytes of input */
case 'd':
size = sizeof(double);
break;
default:
raise_invalid_argument_warning("Invalid format type %c", type);
return false;
}
/* Do actual unpacking */
for (int i = 0; i != arg; i++ ) {
/* Space for name + number, safe as namelen is ensured <= 200 */
char n[256];
if (arg != 1 || namelen == 0) {
/* Need to add element number to name */
snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
} else {
/* Truncate name to next format code or end of string */
snprintf(n, sizeof(n), "%.*s", namelen, name);
}
const auto n_str = String(n, CopyString);
int64_t n_int;
const auto n_key = n_str.get()->isStrictlyInteger(n_int)
? Variant(n_int)
: Variant(n_str);
if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
raise_invalid_argument_warning("Type %c: integer overflow", type);
inputpos = 0;
}
if ((inputpos + size) <= inputlen) {
switch ((int) type) {
case 'a':
case 'A':
case 'Z': {
int len = inputlen - inputpos; /* Remaining string */
/* If size was given take minimum of len and size */
if ((size >= 0) && (len > size)) {
len = size;
}
size = len;
/* A will strip any trailing whitespace */
if (type == 'A')
{
char padn = '\0'; char pads = ' '; char padt = '\t';
char padc = '\r'; char padl = '\n';
while (--len >= 0) {
if (input[inputpos + len] != padn
&& input[inputpos + len] != pads
&& input[inputpos + len] != padt
&& input[inputpos + len] != padc
&& input[inputpos + len] != padl
)
break;
}
}
/* Remove everything after the first null */
if (type=='Z') {
int s;
for (s=0 ; s < len ; s++) {
if (input[inputpos + s] == '\0')
break;
}
len = s;
}
/*only A is \0 terminated*/
if (type=='A')
len++;
ret.set(n_key, String(input + inputpos, len, CopyString));
break;
}
case 'h':
case 'H': {
int len = (inputlen - inputpos) * 2; /* Remaining */
int nibbleshift = (type == 'h') ? 0 : 4;
int first = 1;
char *buf;
int ipos, opos;
/* If size was given take minimum of len and size */
if (size >= 0 && len > (size * 2)) {
len = size * 2;
}
if (argb > 0) {
len -= argb % 2;
}
String s = String(len, ReserveString);
buf = s.mutableData();
for (ipos = opos = 0; opos < len; opos++) {
char c = (input[inputpos + ipos] >> nibbleshift) & 0xf;
if (c < 10) {
c += '0';
} else {
c += 'a' - 10;
}
buf[opos] = c;
nibbleshift = (nibbleshift + 4) & 7;
if (first-- == 0) {
ipos++;
first = 1;
}
}
s.setSize(len);
ret.set(n_key, s);
break;
}
case 'c':
case 'C': {
int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0;
ret.set(n_key, unpack(&input[inputpos], 1, issigned, byte_map));
break;
}
case 's':
case 'S':
case 'n':
case 'v': {
int issigned = 0;
int64_t *map = machine_endian_short_map;
if (type == 's') {
issigned = input[inputpos + (machine_little_endian ? 1 : 0)] &
0x80;
} else if (type == 'n') {
map = big_endian_short_map;
} else if (type == 'v') {
map = little_endian_short_map;
}
ret.set(n_key, unpack(&input[inputpos], 2, issigned, map));
break;
}
case 'i':
case 'I': {
int32_t v = 0;
int issigned = 0;
if (type == 'i') {
issigned = input[inputpos + (machine_little_endian ?
(sizeof(int) - 1) : 0)] & 0x80;
} else if (sizeof(int32_t) > 4 &&
(input[inputpos + machine_endian_int32_map[3]]
& 0x80) == 0x80) {
v = ~INT_MAX;
}
v |= unpack(&input[inputpos], sizeof(int), issigned, int_map);
if (type == 'i') {
ret.set(n_key, v);
} else {
uint64_t u64 = uint32_t(v);
ret.set(n_key, u64);
}
break;
}
case 'l':
case 'L':
case 'N':
case 'V': {
int issigned = 0;
int64_t *map = machine_endian_int32_map;
int64_t v = 0;
if (type == 'l' || type == 'L') {
issigned = input[inputpos + (machine_little_endian ? 3 : 0)]
& 0x80;
} else if (type == 'N') {
issigned = input[inputpos] & 0x80;
map = big_endian_int32_map;
} else if (type == 'V') {
issigned = input[inputpos + 3] & 0x80;
map = little_endian_int32_map;
}
if (sizeof(int32_t) > 4 && issigned) {
v = ~INT_MAX;
}
v |= unpack(&input[inputpos], 4, issigned, map);
if (type == 'l') {
ret.set(n_key, v);
} else {
uint64_t u64 = uint32_t(v);
ret.set(n_key, u64);
}
break;
}
case 'q':
case 'Q':
case 'J':
case 'P': {
int issigned = 0;
int64_t *map = machine_endian_int64_map;
int64_t v = 0;
if (type == 'q' || type == 'Q') {
issigned = input[inputpos + (machine_little_endian ? 7 : 0)] & 0x80;
} else if (type == 'J') {
issigned = input[inputpos] & 0x80;
map = big_endian_int64_map;
} else if (type == 'P') {
issigned = input[inputpos + 7] & 0x80;
map = little_endian_int64_map;
}
v = unpack(&input[inputpos], 8, issigned, map);
if (type == 'q') {
ret.set(n_key, v);
} else {
uint64_t u64 = uint64_t(v);
ret.set(n_key, u64);
}
break;
}
case 'f': {
float v;
memcpy(&v, &input[inputpos], sizeof(float));
ret.set(n_key, (double)v);
break;
}
case 'd': {
double v;
memcpy(&v, &input[inputpos], sizeof(double));
ret.set(n_key, v);
break;
}
case 'x':
/* Do nothing with input, just skip it */
break;
case 'X':
if (inputpos < size) {
inputpos = -size;
i = arg - 1; /* Break out of for loop */
if (arg >= 0) {
raise_invalid_argument_warning("Type %c: outside of string", type);
}
}
break;
case '@':
if (arg <= inputlen) {
inputpos = arg;
} else {
raise_invalid_argument_warning("Type %c: outside of string", type);
}
i = arg - 1; /* Done, break out of for loop */
break;
}
inputpos += size;
if (inputpos < 0) {
if (size != -1) { /* only print warning if not working with * */
raise_invalid_argument_warning("Type %c: outside of string", type);
}
inputpos = 0;
}
} else if (arg < 0) {
/* Reached end of input for '*' repeater */
break;
} else {
raise_invalid_argument_warning
("Type %c: not enough input, need %d, have %d",
type, size, inputlen - inputpos);
return false;
}
}
formatlen--; /* Skip '/' separator, does no harm if inputlen == 0 */
format++;
}
return ret;
}