hphp/runtime/base/variable-unserializer.cpp

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/base/variable-unserializer.h" #include <algorithm> #include <utility> #include <folly/Conv.h> #include <folly/Range.h> #include <folly/lang/Launder.h> #include "hphp/runtime/base/array-init.h" #include "hphp/runtime/base/array-iterator.h" #include "hphp/runtime/base/autoload-handler.h" #include "hphp/runtime/base/collections.h" #include "hphp/runtime/base/comparisons.h" #include "hphp/runtime/base/dummy-resource.h" #include "hphp/runtime/base/execution-context.h" #include "hphp/runtime/base/runtime-error.h" #include "hphp/runtime/base/runtime-option.h" #include "hphp/runtime/base/struct-log-util.h" #include "hphp/runtime/base/request-info.h" #include "hphp/runtime/base/vanilla-keyset.h" #include "hphp/runtime/base/variable-serializer.h" #include "hphp/runtime/ext/collections/ext_collections-map.h" #include "hphp/runtime/ext/collections/ext_collections-pair.h" #include "hphp/runtime/ext/collections/ext_collections-set.h" #include "hphp/runtime/ext/collections/ext_collections-vector.h" #include "hphp/runtime/ext/std/ext_std_classobj.h" #include "hphp/runtime/vm/native-data.h" #include "hphp/runtime/vm/repo-file.h" #include "hphp/runtime/vm/repo-global-data.h" #include "hphp/runtime/vm/jit/perf-counters.h" #include "hphp/zend/zend-strtod.h" namespace HPHP { namespace { enum class ArrayKind { PHP, Dict, Vec, Keyset }; [[noreturn]] NEVER_INLINE void throwUnexpectedSep(char expect, char actual) { throw Exception("Expected '%c' but got '%c'", expect, actual); } [[noreturn]] NEVER_INLINE void throwOutOfRange(int64_t id) { throw Exception("Id %" PRId64 " out of range", id); } [[noreturn]] NEVER_INLINE void throwUnexpectedStr(const char* expect, folly::StringPiece& actual) { throw Exception("Expected '%s' but got '%.*s'", expect, (int)actual.size(), actual.data()); } [[noreturn]] NEVER_INLINE void throwUnknownType(char type) { throw Exception("Unknown type '%c'", type); } [[noreturn]] NEVER_INLINE void throwInvalidPair() { throw Exception("Pair objects must have exactly 2 elements"); } [[noreturn]] NEVER_INLINE void throwInvalidOFormat(const String& clsName) { throw Exception("%s does not support the 'O' serialization format", clsName.data()); } [[noreturn]] NEVER_INLINE void throwMangledPrivateProperty() { throw Exception("Mangled private object property"); } [[noreturn]] NEVER_INLINE void throwUnterminatedProperty() { throw Exception("Object property not terminated properly"); } [[noreturn]] NEVER_INLINE void throwNotCollection(const String& clsName) { throw Exception("%s is not a collection class", clsName.data()); } [[noreturn]] NEVER_INLINE void throwUnexpectedType(const String& key, const ObjectData* obj, TypedValue type) { auto msg = folly::format( "Property {} for class {} was deserialized with type ({}) that " "didn't match what we inferred in static analysis", key, obj->getVMClass()->name(), tname(type.m_type) ).str(); throw Exception(msg); } [[noreturn]] NEVER_INLINE void throwUnexpectedType(const StringData* key, const ObjectData* obj, TypedValue type) { String str(key->data(), key->size(), CopyString); throwUnexpectedType(str, obj, type); } [[noreturn]] NEVER_INLINE void throwArraySizeOutOfBounds() { throw Exception("Array size out of bounds"); } [[noreturn]] NEVER_INLINE void throwInvalidKey() { throw Exception("Invalid key"); } [[noreturn]] NEVER_INLINE void throwUnterminatedElement() { throw Exception("Array element not terminated properly"); } [[noreturn]] NEVER_INLINE void throwLargeStringSize(int64_t size) { throw Exception("Size of serialized string (%" PRId64 ") exceeds max", size); } [[noreturn]] NEVER_INLINE void throwNegativeStringSize(int64_t size) { throw Exception("Size of serialized string (%" PRId64 ") " "must not be negative", size); } [[noreturn]] NEVER_INLINE void throwBadFormat(const ObjectData* obj, char type) { throw Exception("%s does not support the '%c' serialization format", header_names[(int)obj->headerKind()], type); } [[noreturn]] NEVER_INLINE void throwInvalidHashKey(const ObjectData* obj) { throw Exception("%s values must be integers or strings", header_names[(int)obj->headerKind()]); } [[noreturn]] NEVER_INLINE void throwColRKey() { throw Exception("Referring to collection keys using the 'r' encoding " "is not supported"); } [[noreturn]] NEVER_INLINE void throwColRefValue() { throw Exception("Collection values cannot be taken by reference"); } [[noreturn]] NEVER_INLINE void throwColRefKey() { throw Exception("Collection keys cannot be taken by reference"); } [[noreturn]] NEVER_INLINE void throwUnexpectedEOB() { throw Exception("Unexpected end of buffer during unserialization"); } [[noreturn]] NEVER_INLINE void throwVecRefValue() { throw Exception("Vecs cannot contain references"); } [[noreturn]] NEVER_INLINE void throwDictRefValue() { throw Exception("Dicts cannot contain references"); } [[noreturn]] NEVER_INLINE void throwKeysetValue() { throw Exception("Keysets can only contain integers and strings"); } [[noreturn]] NEVER_INLINE void throwInvalidClassName() { throw Exception("Provided class name is invalid"); } void warnOrThrowUnknownClass(const String& clsName) { if (RuntimeOption::EvalForbidUnserializeIncompleteClass) { auto const msg = folly::sformat( "Attempted to unserialize class named '{}' but it doesn't exist", clsName.toCppString() ); if (RuntimeOption::EvalForbidUnserializeIncompleteClass > 1) { throw_object("Exception", make_vec_array(msg)); } else { raise_warning(msg); } } } } const StaticString s_serialized("serialized"), s_unserialize("unserialize"), s_PHP_Incomplete_Class("__PHP_Incomplete_Class"), s_PHP_Incomplete_Class_Name("__PHP_Incomplete_Class_Name"), s___wakeup("__wakeup"); /////////////////////////////////////////////////////////////////////////////// const StaticString s_force_darrays{"force_darrays"}; const StaticString s_mark_legacy_arrays{"mark_legacy_arrays"}; VariableUnserializer::VariableUnserializer( const char* str, size_t len, Type type, bool allowUnknownSerializableClass, const Array& options) : m_type(type) , m_readOnly(false) , m_buf(str) , m_end(str + len) , m_unknownSerializable(allowUnknownSerializableClass) , m_options(options) , m_begin(str) , m_forceDArrays{m_options[s_force_darrays].toBoolean()} , m_markLegacyArrays{m_options[s_mark_legacy_arrays].toBoolean()} {} VariableUnserializer::Type VariableUnserializer::type() const { return m_type; } bool VariableUnserializer::allowUnknownSerializableClass() const { return m_unknownSerializable; } const char* VariableUnserializer::head() const { return m_buf; } const char* VariableUnserializer::begin() const { return m_begin; } const char* VariableUnserializer::end() const { return m_end; } char VariableUnserializer::peek() const { check(); return *m_buf; } char VariableUnserializer::peekBack() const { return m_buf[-1]; } bool VariableUnserializer::endOfBuffer() const { return m_buf >= m_end; } char VariableUnserializer::readChar() { check(); return *(m_buf++); } void VariableUnserializer::add(tv_lval v, UnserializeMode mode) { switch (mode) { case UnserializeMode::Value: m_refs.emplace_back(v); break; // We don't support refs to collection keys; use nullptr as a sentinel. case UnserializeMode::ColKey: m_refs.emplace_back(nullptr); break; case UnserializeMode::Key: break; } } void VariableUnserializer::reserveForAdd(size_t count) { // If the array is large, the space for the backrefs could be // significant, so we need to check for OOM beforehand. To do this, // we need to do some guess work to estimate what memory the vector // will consume once we've done the reserve (we assume the vector // doubles in capacity as necessary). auto const newSize = m_refs.size() + count; auto const capacity = m_refs.capacity(); if (newSize <= capacity) return; auto const total = folly::nextPowTwo(newSize) * sizeof(decltype(m_refs)::value_type); if (UNLIKELY(total > kMaxSmallSize && tl_heap->preAllocOOM(total))) { check_non_safepoint_surprise(); } m_refs.reserve(newSize); check_non_safepoint_surprise(); } TypedValue VariableUnserializer::getByVal(int id) { if (id <= 0 || id > m_refs.size()) throwOutOfRange(id); auto const result = m_refs[id - 1]; if (!result) throwColRKey(); return result.tv(); } void VariableUnserializer::check() const { if (m_buf >= m_end) throwUnexpectedEOB(); } void VariableUnserializer::checkElemTermination() const { auto const ch = peekBack(); if (ch != ';' && ch != '}') throwUnterminatedElement(); } void VariableUnserializer::set(const char* buf, const char* end) { m_buf = buf; m_end = end; } Variant VariableUnserializer::unserialize() { Variant v; unserializeVariant(v.asTypedValue()); if (UNLIKELY(StructuredLog::coinflip(RuntimeOption::EvalSerDesSampleRate))) { String ser(m_begin, m_end - m_begin, CopyString); auto const fmt = folly::sformat("VU{}", (int)m_type); StructuredLog::logSerDes(fmt.c_str(), "des", ser, v); } auto const providedCoeffects = m_pure ? RuntimeCoeffects::pure() : RuntimeCoeffects::defaults(); for (auto& obj : m_sleepingObjects) { obj->invokeWakeup(providedCoeffects); } return v; } namespace { std::pair<int64_t,const char*> hh_strtoll_base10(const char* p) { int64_t x = 0; bool neg = false; if (*p == '-') { neg = true; ++p; } while (*p >= '0' && *p <= '9') { x = (x * 10) + ('0' - *p); ++p; } if (!neg) { x = -x; } return std::pair<int64_t,const char*>(x, p); } } int64_t VariableUnserializer::readInt() { check(); auto r = hh_strtoll_base10(m_buf); m_buf = r.second; return r.first; } double VariableUnserializer::readDouble() { check(); const char* newBuf; double r = zend_strtod(m_buf, &newBuf); m_buf = newBuf; return r; } folly::StringPiece VariableUnserializer::readStr(unsigned n) { check(); auto const bufferLimit = std::min(size_t(m_end - m_buf), size_t(n)); auto str = folly::StringPiece(m_buf, bufferLimit); m_buf += bufferLimit; return str; } void VariableUnserializer::expectChar(char expected) { char ch = readChar(); if (UNLIKELY(ch != expected)) { throwUnexpectedSep(expected, ch); } } namespace { bool isWhitelistClass(const String& requestedClassName, const Array& list, bool includeSubclasses) { if (!list.empty()) { for (ArrayIter iter(list); iter; ++iter) { auto allowedClassName = iter.second().toString(); auto const matches = includeSubclasses ? HHVM_FN(is_a)(requestedClassName, allowedClassName, true) : allowedClassName.get()->isame(requestedClassName.get()); if (matches) return true; } } return false; } } const StaticString s_throw("throw"); const StaticString s_allowed_classes("allowed_classes"); const StaticString s_include_subclasses("include_subclasses"); bool VariableUnserializer::whitelistCheck(const String& clsName) const { if (m_type != Type::Serialize || m_options.isNull()) { return true; } // PHP7-style class whitelisting // Allowed classes are allowed, // all others result in __Incomplete_PHP_Class if (m_options.exists(s_allowed_classes)) { auto allowed_classes = m_options[s_allowed_classes]; auto const ok = [&] { if (allowed_classes.isArray()) { auto const subs = m_options[s_include_subclasses].toBoolean(); return isWhitelistClass(clsName, allowed_classes.toArray(), subs); } else if (allowed_classes.isBoolean()) { return allowed_classes.toBoolean(); } else { throw InvalidAllowedClassesException(); } }(); if (!ok && m_options[s_throw].toBoolean()) { throw_object(m_options[s_throw].toString(), make_vec_array(clsName)); } return ok; } if (!RuntimeOption::UnserializationWhitelistCheck) { // No need for BC HHVM-style whitelist check, // since the check isn't enabled. // Go with PHP5 default behavior of allowing all return true; } // Check for old-style whitelist if (isWhitelistClass(clsName, m_options, false)) { return true; } // Non-whitelisted class with a check enabled, // are we willing to hard-error over it? const char* err_msg = "The object being unserialized with class name '%s' " "is not in the given whitelist"; // followed by ' in <filename> on line %d'. if (RuntimeOption::UnserializationWhitelistCheckWarningOnly) { // Nope, just whine to the user and let it through raise_warning(err_msg, clsName.c_str()); return true; } else { // Yes, shut it down. raise_error(err_msg, clsName.c_str()); return false; } } void VariableUnserializer::addSleepingObject(const Object& o) { m_sleepingObjects.emplace_back(o); } bool VariableUnserializer::matchString(folly::StringPiece str) { const char* p = m_buf; assertx(p <= m_end); int total = 0; if (*p == 'S' && type() == VariableUnserializer::Type::APCSerialize) { total = 2 + 8 + 1; if (p + total > m_end) return false; p++; if (*p++ != ':') return false; auto const sd = *reinterpret_cast<StringData*const*>(p); assertx(sd->isStatic()); if (str.compare(sd->slice()) != 0) return false; p += size_t(8); } else { const auto ss = str.size(); if (ss >= 100) return false; int digits = ss >= 10 ? 2 : 1; total = 2 + digits + 2 + ss + 2; if (p + total > m_end) return false; if (*p++ != 's') return false; if (*p++ != ':') return false; if (digits == 2) { if (*p++ != '0' + ss/10) return false; if (*p++ != '0' + ss%10) return false; } else { if (*p++ != '0' + ss) return false; } if (*p++ != ':') return false; if (*p++ != '\"') return false; if (memcmp(p, str.data(), ss)) return false; p += ss; if (*p++ != '\"') return false; } if (*p++ != ';') return false; assertx(m_buf + total == p); m_buf = p; return true; } /////////////////////////////////////////////////////////////////////////////// // remainingProps should include the current property being unserialized. void VariableUnserializer::unserializePropertyValue(tv_lval v, int remainingProps) { assertx(remainingProps > 0); unserializeVariant(v); if (--remainingProps > 0) { auto lastChar = peekBack(); if (lastChar != ';' && lastChar != '}') { throwUnterminatedProperty(); } } } // nProp should include the current property being unserialized. NEVER_INLINE void VariableUnserializer::unserializeProp(ObjectData* obj, const String& key, Class* ctx, const String& realKey, int nProp) { auto const cls = obj->getVMClass(); auto const lookup = cls->getDeclPropSlot(ctx, key.get()); auto const slot = lookup.slot; tv_lval t; if (slot == kInvalidSlot || !lookup.accessible) { // Unserialize as a dynamic property. If this is the first, we need to // pre-allocate space in the array to ensure the elements don't move during // unserialization. obj->reserveDynProps(nProp); t = obj->makeDynProp(realKey.get()); } else { // We'll check if this doesn't violate the type-hint once we're done // unserializing all the props. t = obj->getPropLval(ctx, key.get()); } unserializePropertyValue(t, nProp); if (!RuntimeOption::RepoAuthoritative) return; if (!RepoFile::globalData().HardPrivatePropInference) return; /* * We assume for performance reasons in repo authoriative mode that * we can see all the sets to private properties in a class. * * It's a hole in this if we don't check unserialization doesn't * violate what we've seen, which we handle by throwing if the repo * was built with this option. */ if (UNLIKELY(slot == kInvalidSlot)) return; auto const repoTy = cls->declPropRepoAuthType(slot); if (LIKELY(tvMatchesRepoAuthType(*t, repoTy))) return; if (t.type() == KindOfUninit && (cls->declProperties()[slot].attrs & AttrLateInit)) { return; } throwUnexpectedType(key, obj, *t); } NEVER_INLINE void VariableUnserializer::unserializeRemainingProps( Object& obj, int remainingProps, Variant& serializedNativeData, bool& hasSerializedNativeData) { obj->unlockObject(); SCOPE_EXIT { obj->lockObject(); }; while (remainingProps > 0) { /* use the number of properties remaining as an estimate for the total number of dynamic properties when we see the first dynamic prop. see getVariantPtr */ Variant v; unserializeVariant(v.asTypedValue(), UnserializeMode::Key); String key = v.toString(); int ksize = key.size(); const char *kdata = key.data(); int subLen = 0; if (key == s_serializedNativeDataKey) { unserializePropertyValue(serializedNativeData.asTypedValue(), remainingProps--); hasSerializedNativeData = true; } else if (kdata[0] == '\0') { if (UNLIKELY(!ksize)) { raise_error("Cannot access empty property"); } // private or protected subLen = strlen(folly::launder(kdata) + 1) + 2; if (UNLIKELY(subLen >= ksize)) { if (subLen == ksize) { raise_error("Cannot access empty property"); } else { throwMangledPrivateProperty(); } } String k(kdata + subLen, ksize - subLen, CopyString); Class* ctx = (Class*)-1; if (kdata[1] != '*') { ctx = Class::lookup( String(kdata + 1, subLen - 2, CopyString).get()); } unserializeProp(obj.get(), k, ctx, key, remainingProps--); } else { unserializeProp(obj.get(), key, nullptr, key, remainingProps--); } } } namespace { static const StaticString s_Vector("Vector"), s_HH_Vector("HH\\Vector"), s_Map("Map"), s_HH_Map("HH\\Map"), s_Set("Set"), s_HH_Set("HH\\Set"), s_Pair("Pair"), s_HH_Pair("HH\\Pair"), s_StableMap("StableMap"); /* * For namespaced collections, returns an "alternate" name, which is a * collection name with or without the namespace qualifier, depending on * what's passed. * If no alternate name is found, returns nullptr. */ const StringData* getAlternateCollectionName(const StringData* clsName) { typedef hphp_hash_map<const StringData*, const StringData*, string_data_hash, string_data_isame> ClsNameMap; auto getAltMap = [] { typedef std::pair<StaticString, StaticString> SStringPair; static ClsNameMap m; static std::vector<SStringPair> mappings { std::make_pair(s_Vector, s_HH_Vector), std::make_pair(s_Map, s_HH_Map), std::make_pair(s_Set, s_HH_Set), std::make_pair(s_Pair, s_HH_Pair) }; for (const auto& p : mappings) { m[p.first.get()] = p.second.get(); m[p.second.get()] = p.first.get(); } // As part of StableMap merging into Map, StableMap is an alias for HH\\Map, // but Map is the sole alias for HH\\Map m[s_StableMap.get()] = s_HH_Map.get(); return &m; }; static const ClsNameMap* altMap = getAltMap(); auto it = altMap->find(clsName); return it != altMap->end() ? it->second : nullptr; } Class* tryAlternateCollectionClass(const StringData* clsName) { auto altName = getAlternateCollectionName(clsName); return altName ? Class::get(altName, /* autoload */ false) : nullptr; } /* * Try to read 'str' while advancing 'cur' without reaching 'end'. */ ALWAYS_INLINE static bool match(const char*& cur, const char* expected, const char* const end) { if (cur + strlen(expected) >= end) return false; while (*expected) { if (*cur++ != *expected++) return false; } return true; } ALWAYS_INLINE static int64_t read64(const char*& cur) { auto p = hh_strtoll_base10(cur); cur = p.second; return p.first; } /* * Read an int64 from 'cur' into 'out'. Returns false on unexpected * (but possibly still legal) format or if 'end' is reached. */ ALWAYS_INLINE bool readInt64(const char*& cur, const char* const end, int64_t& out) { if (!match(cur, "i:", end)) return false; out = read64(cur); return match(cur, ";", end); } /* * Read, allocate, and return a string from 'cur'. Returns null on unexpected * (but possibly still legal) format or if 'end' is reached, without allocating. */ ALWAYS_INLINE static StringData* readStringData(const char*& cur, const char* const end, int maxLen) { if (!match(cur, "s:", end)) return nullptr; auto len = read64(cur); if (len < 0 || len >= maxLen) return nullptr; if (!match(cur, ":\"", end)) return nullptr; auto const slice = folly::StringPiece(cur, len); if ((cur += len) >= end) return nullptr; if (!match(cur, "\";", end)) return nullptr; // TODO(11398853): Consider streaming/non-temporal stores here. auto sd = StringData::Make(slice, CopyString); return sd; } } NEVER_INLINE void VariableUnserializer::unserializeVariant( tv_lval self, UnserializeMode mode /* = UnserializeMode::Value */) { // If we're overwriting an array element or property value, save the old // value in case it's later referenced via an r: or R: ref. if (isRefcountedType(self.type()) && mode == UnserializeMode::Value) { m_overwrittenList.append(*self); } // NOTE: If you make changes to how serialization and unserialization work, // make sure to update reserialize() here and test_apc_reserialize() // in "test/ext/test_ext_apc.cpp". char type = readChar(); char sep = readChar(); if (type != 'R') { add(self, mode); } if (type == 'N') { if (sep != ';') throwUnexpectedSep(';', sep); tvSetNull(self); // NULL *IS* the value, without we get undefined warnings return; } if (sep != ':') throwUnexpectedSep(':', sep); switch (type) { case 'r': case 'R': { int64_t id = readInt(); tvSet(getByVal(id), self); } break; case 'b': { int64_t v = readInt(); tvSetBool((bool)v, self); break; } case 'i': { int64_t v = readInt(); tvSetInt(v, self); break; } case 'd': { char ch = peek(); bool negative = false; if (ch == '-') { negative = true; readChar(); ch = peek(); } double v; if (ch == 'I') { auto str = readStr(3); if (str.size() != 3 || str[1] != 'N' || str[2] != 'F') { throwUnexpectedStr("INF", str); } v = std::numeric_limits<double>::infinity(); } else if (ch == 'N') { auto str = readStr(3); if (str.size() != 3 || str[1] != 'A' || str[2] != 'N') { throwUnexpectedStr("NAN", str); } v = std::numeric_limits<double>::quiet_NaN(); } else { v = readDouble(); } tvSetDouble(negative ? -v : v, self); } break; case 'l': { String c = unserializeString(); if (mode == UnserializeMode::Value) { tvMove( make_tv<KindOfLazyClass>( LazyClassData::create(makeStaticString(c.get())) ), self ); } else { if (RuntimeOption::EvalRaiseClassConversionWarning) { raise_class_to_string_conversion_warning(); } tvMove( make_tv<KindOfPersistentString>(makeStaticString(c.get())), self ); } } break; case 's': { String v = unserializeString(); tvMove(make_tv<KindOfString>(v.detach()), self); if (!endOfBuffer()) { // Semicolon *should* always be required, // but PHP's implementation allows omitting it // and still functioning. // Worse, it throws it away without any check. // So we'll do the same. Sigh. readChar(); } } return; case 'S': if (this->type() == VariableUnserializer::Type::APCSerialize) { auto str = readStr(8); assertx(str.size() == 8); auto const sd = *reinterpret_cast<StringData*const*>(&str[0]); assertx(sd->isStatic()); tvMove(make_tv<KindOfPersistentString>(sd), self); } else { throwUnknownType(type); } break; case 'a': // PHP array case 'D': // Dict { // Check stack depth to avoid overflow. check_recursion_throw(); // It seems silly to check this here, but GCC actually generates much // better code this way. auto a = (type == 'a') ? unserializeArray() : unserializeDict(); if (UNLIKELY(m_markLegacyArrays && type == 'a')) { a.setLegacyArray(true); } tvMove(make_array_like_tv(a.detach()), self); } return; // array has '}' terminating case 'X': // MarkedDArray case 'Y': // DArray { // Check stack depth to avoid overflow. check_recursion_throw(); auto a = unserializeDArray(); if (UNLIKELY(m_markLegacyArrays || type == 'X')) { a.setLegacyArray(true); } tvMove(make_array_like_tv(a.detach()), self); } return; // array has '}' terminating case 'x': // MarkedVArray case 'y': // VArray { // Check stack depth to avoid overflow. check_recursion_throw(); auto a = unserializeVArray(); if (UNLIKELY(m_markLegacyArrays || type == 'x')) { a.setLegacyArray(true); } tvMove(make_array_like_tv(a.detach()), self); } return; // array has '}' terminating case 'v': // Vec { // Check stack depth to avoid overflow. check_recursion_throw(); auto a = unserializeVec(); tvMove(make_tv<KindOfVec>(a.detach()), self); } return; // array has '}' terminating case 'k': // Keyset { // Check stack depth to avoid overflow. check_recursion_throw(); auto a = unserializeKeyset(); tvMove(make_tv<KindOfKeyset>(a.detach()), self); } return; // array has '}' terminating case 'L': { int64_t id = readInt(); expectChar(':'); String rsrcName = unserializeString(); expectChar('{'); expectChar('}'); auto rsrc = req::make<DummyResource>(); rsrc->o_setResourceId(id); rsrc->m_class_name = std::move(rsrcName); tvMove(make_tv<KindOfResource>(rsrc.detach()->hdr()), self); } return; // resource has '}' terminating case 'O': case 'V': case 'K': { String clsName = unserializeString(); expectChar(':'); const int64_t size = readInt(); expectChar(':'); expectChar('{'); const bool allowObjectFormatForCollections = true; Class* cls = nullptr; // If we are potentially dealing with a collection, we need to try to // load the collection class under an alternate name so that we can // deserialize data that was serialized before the migration of // collections to the HH namespace. if (type == 'O') { if (whitelistCheck(clsName)) { if (allowObjectFormatForCollections) { // In order to support the legacy {O|V}:{Set|Vector|Map} // serialization, we defer autoloading until we know that there's // no alternate (builtin) collection class. cls = Class::get(clsName.get(), /* autoload */ false); if (!cls) { cls = tryAlternateCollectionClass(clsName.get()); } } // No valid class was found, lets try the autoloader. if (!cls) { if (!is_valid_class_name(clsName.slice())) { throwInvalidClassName(); } cls = Class::load(clsName.get()); // with autoloading } } } else { // Collections are CPP builtins; don't attempt to autoload cls = Class::get(clsName.get(), /* autoload */ false); if (!cls) { cls = tryAlternateCollectionClass(clsName.get()); } if (!cls || !cls->isCollectionClass()) { throwNotCollection(clsName); } } Object obj; auto remainingProps = size; if (cls) { // Only unserialize CPP extension types which can actually support // it. Otherwise, we risk creating a CPP object without having it // initialized completely. if (cls->instanceCtor() && !cls->isCppSerializable() && !cls->isCollectionClass()) { assertx(obj.isNull()); throw_null_pointer_exception(); } else { if (UNLIKELY(collections::isType(cls, CollectionType::Pair))) { if (UNLIKELY(size != 2)) { throwInvalidPair(); } // pairs can't be constructed without elements obj = Object{req::make<c_Pair>(make_tv<KindOfNull>(), make_tv<KindOfNull>(), c_Pair::NoIncRef{})}; } else if (UNLIKELY(cls->hasReifiedGenerics())) { // First prop on the serialized list is the reified generics prop if (!matchString(s_86reified_prop.slice())) { throwInvalidOFormat(clsName); } TypedValue tv = make_tv<KindOfNull>(); auto const t = tv_lval{&tv}; unserializePropertyValue(t, remainingProps--); if (!TypeStructure::coerceToTypeStructureList_SERDE_ONLY(t)) { throwInvalidOFormat(clsName); } assertx(tvIsVec(t)); obj = Object{cls, t.val().parr}; } else { obj = Object{cls}; } } } else { warnOrThrowUnknownClass(clsName); obj = Object{SystemLib::s___PHP_Incomplete_ClassClass}; obj->setProp(nullptr, s_PHP_Incomplete_Class_Name.get(), clsName.asTypedValue()); } assertx(!obj.isNull()); tvSet(make_tv<KindOfObject>(obj.get()), self); if (remainingProps > 0) { // Check stack depth to avoid overflow. check_recursion_throw(); if (type == 'O') { // Collections are not allowed if (obj->isCollection()) { throwInvalidOFormat(clsName); } Variant serializedNativeData = init_null(); bool hasSerializedNativeData = false; bool checkRepoAuthType = RuntimeOption::RepoAuthoritative && RepoFile::globalData().HardPrivatePropInference; Class* objCls = obj->getVMClass(); // Try fast case. if (remainingProps >= objCls->numDeclProperties() - (objCls->hasReifiedGenerics() ? 1 : 0)) { auto mismatch = false; auto const objProps = obj->props(); auto const declProps = objCls->declProperties(); for (auto const& p : declProps) { auto slot = p.serializationIdx; auto index = objCls->propSlotToIndex(slot); auto const& prop = declProps[slot]; if (prop.name == s_86reified_prop.get()) continue; if (!matchString(prop.mangledName->slice())) { mismatch = true; break; } // don't need to worry about overwritten list, because // this is definitely the first time we're setting this // property. auto const t = objProps->at(index); unserializePropertyValue(t, remainingProps--); if (UNLIKELY(checkRepoAuthType && !tvMatchesRepoAuthType(*t, prop.repoAuthType))) { throwUnexpectedType(prop.name, obj.get(), *t); } } // If everything matched, all remaining properties are dynamic. if (!mismatch && remainingProps > 0) { // the dynPropTable can be mutated while we're deserializing // the contents of this object's prop array. Don't hold a // reference to this object's entry in the table while looping. obj->reserveDynProps(remainingProps); while (remainingProps > 0) { Variant v; unserializeVariant(v.asTypedValue(), UnserializeMode::Key); String key = v.toString(); if (key == s_serializedNativeDataKey) { unserializePropertyValue(serializedNativeData.asTypedValue(), remainingProps--); hasSerializedNativeData = true; } else { auto kdata = key.data(); if (kdata[0] == '\0') { auto ksize = key.size(); if (UNLIKELY(ksize == 0)) { raise_error("Cannot access empty property"); } // private or protected auto subLen = strlen(folly::launder(kdata) + 1) + 2; if (UNLIKELY(subLen >= ksize)) { if (subLen == ksize) { raise_error("Cannot access empty property"); } else { throwMangledPrivateProperty(); } } } auto const lval = obj->makeDynProp(key.get()); unserializePropertyValue(lval, remainingProps--); } } } } if (remainingProps > 0) { INC_TPC(unser_prop_slow); unserializeRemainingProps(obj, remainingProps, serializedNativeData, hasSerializedNativeData); remainingProps = 0; } else { INC_TPC(unser_prop_fast); } // Verify that all the unserialized properties satisfy their // type-hints. Its safe to do it like this (after we've set the values // in the properties) because this object hasn't escaped to the // outside world yet. obj->verifyPropTypeHints(); // nativeDataWakeup is called last to ensure that all properties are // already unserialized. We also ensure that nativeDataWakeup is // invoked regardless of whether or not serialized native data exists // within the serialized content. if (obj->hasNativeData() && obj->getVMClass()->getNativeDataInfo()->isSerializable()) { Native::nativeDataWakeup(obj.get(), serializedNativeData); } else if (hasSerializedNativeData) { raise_warning("%s does not expect any serialized native data.", clsName.data()); } } else { assertx(type == 'V' || type == 'K'); if (!obj->isCollection()) { throwNotCollection(clsName); } unserializeCollection(obj.get(), size, type); } } expectChar('}'); if (cls && cls->lookupMethod(s___wakeup.get()) && (this->type() != VariableUnserializer::Type::DebuggerSerialize || (cls->instanceCtor() && cls->isCppSerializable()))) { // Don't call wakeup when unserializing for the debugger, except for // natively implemented classes. addSleepingObject(obj); } check_non_safepoint_surprise(); } return; // object has '}' terminating case 'C': { if (this->type() == VariableUnserializer::Type::DebuggerSerialize) { raise_error("Debugger shouldn't call custom unserialize method"); } String clsName = unserializeString(); expectChar(':'); String serialized = unserializeString('{', '}'); auto obj = [&]() -> Object { if (whitelistCheck(clsName)) { // Try loading without the autoloader first auto cls = Class::get(clsName.get(), /* autoload */ false); if (!cls) { if (!is_valid_class_name(clsName.slice())) { throwInvalidClassName(); } cls = Class::load(clsName.get()); } if (cls) { return Object::attach(g_context->createObject(cls, init_null_variant, false /* init */)); } } if (!allowUnknownSerializableClass()) { raise_error("unknown class %s", clsName.data()); } warnOrThrowUnknownClass(clsName); Object ret = create_object_only(s_PHP_Incomplete_Class); ret->setProp(nullptr, s_PHP_Incomplete_Class_Name.get(), clsName.asTypedValue()); ret->setProp(nullptr, s_serialized.get(), serialized.asTypedValue()); return ret; }(); if (!obj->instanceof(SystemLib::s_SerializableClass)) { raise_warning("Class %s has no unserializer", obj->getClassName().data()); } else { obj->o_invoke_few_args(s_unserialize, RuntimeCoeffects::fixme(), 1, serialized); } tvMove(make_tv<KindOfObject>(obj.detach()), self); } return; // object has '}' terminating default: throwUnknownType(type); } expectChar(';'); } Array VariableUnserializer::unserializeArray() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } unserializeProvenanceTag(); if (size == 0) { expectChar('}'); return Array::CreateDict(); } // For large arrays, do a naive pre-check for OOM. auto const allocsz = VanillaDict::computeAllocBytesFromMaxElms(size); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } // Pre-allocate an ArrayData of the given size, to avoid escalation in the // middle, which breaks references. auto arr = m_forceDArrays || type() == Type::Serialize ? DictInit(size).toArray() : DictInit(size).toArray(); reserveForAdd(size); for (int64_t i = 0; i < size; i++) { Variant key; unserializeVariant(key.asTypedValue(), UnserializeMode::Key); if (!key.isString() && !key.isInteger()) throwInvalidKey(); unserializeVariant(VanillaDict::LvalInPlace(arr.get(), key)); if (i < size - 1) checkElemTermination(); } check_non_safepoint_surprise(); expectChar('}'); return arr; } void VariableUnserializer::unserializeProvenanceTag() { if (type() != VariableUnserializer::Type::Internal && type() != VariableUnserializer::Type::Serialize) { return; } auto const read_line = [&]() -> int { expectChar(':'); expectChar('i'); expectChar(':'); auto const line = static_cast<int>(readInt()); expectChar(';'); return line; }; auto const read_name = [&]() -> const StringData* { if (peek() == 't') { assertx(m_unitFilename); expectChar('t'); return m_unitFilename; } else { expectChar('s'); expectChar(':'); return makeStaticString(unserializeString().get()); } }; if (peek() != 'p') return; expectChar('p'); auto const peeked = peek(); if (peeked == ':') { read_line(); read_name(); expectChar(';'); } else if (peeked == 'f') { readChar(); read_line(); read_name(); expectChar(';'); } else if (peeked == 'c' || peeked == 'e' || peeked == 'r' || peeked == 'z') { readChar(); expectChar(':'); read_name(); expectChar(';'); } } Array VariableUnserializer::unserializeDict() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } unserializeProvenanceTag(); if (size == 0) { expectChar('}'); return Array::attach(staticEmptyDictArray()); } // For large arrays, do a naive pre-check for OOM. auto const allocsz = VanillaDict::computeAllocBytesFromMaxElms(size); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } Array arr = DictInit(size).toArray(); for (int64_t i = 0; i < size; i++) { Variant key; unserializeVariant(key.asTypedValue(), UnserializeMode::Key); if (!key.isString() && !key.isInteger()) throwInvalidKey(); unserializeVariant(VanillaDict::LvalInPlace(arr.get(), key)); if (i < size - 1) checkElemTermination(); } check_non_safepoint_surprise(); expectChar('}'); return arr; } Array VariableUnserializer::unserializeVec() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } unserializeProvenanceTag(); if (size == 0) { expectChar('}'); return Array::attach(staticEmptyVec()); } auto const sizeClass = VanillaVec::capacityToSizeIndex(size); auto const allocsz = MemoryManager::sizeIndex2Size(sizeClass); // For large arrays, do a naive pre-check for OOM. if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } Array arr = VecInit(size).toArray(); reserveForAdd(size); for (int64_t i = 0; i < size; i++) { unserializeVariant(VanillaVec::LvalNewInPlace(arr.get())); if (i < size - 1) checkElemTermination(); } check_non_safepoint_surprise(); expectChar('}'); return arr; } Array VariableUnserializer::unserializeVArray() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } unserializeProvenanceTag(); if (size == 0) { expectChar('}'); if (m_type != Type::Serialize) return Array::CreateVec(); return m_forceDArrays ? Array::CreateDict() : Array::CreateVec(); } auto const oomCheck = [&](size_t allocsz) { // For large arrays, do a naive pre-check for OOM. if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } }; auto arr = Array{}; if (m_forceDArrays && m_type == Type::Serialize) { // Deserialize to vector-ish darray. Use direct calls to VanillaDict. oomCheck(VanillaDict::computeAllocBytesFromMaxElms(size)); arr = DictInit(size).toArray(); reserveForAdd(size); for (int64_t i = 0; i < size; i++) { unserializeVariant(VanillaDict::LvalInPlace(arr.get(), i)); if (i < size - 1) checkElemTermination(); } } else { // Deserialize to varray. Use direct calls to VanillaDict. auto const index = VanillaVec::capacityToSizeIndex(size); oomCheck(MemoryManager::sizeIndex2Size(index)); arr = VecInit(size).toArray(); reserveForAdd(size); for (int64_t i = 0; i < size; i++) { unserializeVariant(VanillaVec::LvalNewInPlace(arr.get())); if (i < size - 1) checkElemTermination(); } } check_non_safepoint_surprise(); expectChar('}'); return arr; } Array VariableUnserializer::unserializeDArray() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } unserializeProvenanceTag(); if (size == 0) { expectChar('}'); return Array::CreateDict(); } // For large arrays, do a naive pre-check for OOM. auto const allocsz = VanillaDict::computeAllocBytesFromMaxElms(size); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } auto arr = DictInit(size).toArray(); reserveForAdd(size); for (int64_t i = 0; i < size; i++) { Variant key; unserializeVariant(key.asTypedValue(), UnserializeMode::Key); if (!key.isString() && !key.isInteger()) throwInvalidKey(); unserializeVariant(VanillaDict::LvalInPlace(arr.get(), key)); if (i < size - 1) checkElemTermination(); } check_non_safepoint_surprise(); expectChar('}'); return arr; } Array VariableUnserializer::unserializeKeyset() { int64_t size = readInt(); expectChar(':'); expectChar('{'); if (size == 0) { expectChar('}'); return Array::CreateKeyset(); } if (UNLIKELY(size < 0 || size > std::numeric_limits<int>::max())) { throwArraySizeOutOfBounds(); } // For large arrays, do a naive pre-check for OOM. auto const allocsz = VanillaKeyset::computeAllocBytesFromMaxElms(size); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } KeysetInit init(size); for (int64_t i = 0; i < size; i++) { Variant key; // Use key mode to stop the unserializer from keeping a pointer to this // variant (since its stack-allocated). unserializeVariant(key.asTypedValue(), UnserializeMode::Key); auto const type = key.getType(); if (UNLIKELY(!isStringType(type) && !isIntType(type))) { throwKeysetValue(); } init.add(key); if (i < (size - 1)) { auto lastChar = peekBack(); if ((lastChar != ';' && lastChar != '}')) { throwUnterminatedElement(); } } } check_non_safepoint_surprise(); expectChar('}'); return init.toArray(); } folly::StringPiece VariableUnserializer::unserializeStringPiece(char delimiter0, char delimiter1) { int64_t size = readInt(); if (size >= RuntimeOption::MaxSerializedStringSize) { throwLargeStringSize(size); } if (size < 0) { throwNegativeStringSize(size); } expectChar(':'); expectChar(delimiter0); auto const piece = readStr(size); expectChar(delimiter1); return piece; } String VariableUnserializer::unserializeString(char delimiter0, char delimiter1) { auto const piece = unserializeStringPiece(delimiter0, delimiter1); return String::attach(readOnly() ? makeStaticString(piece) : StringData::Make(piece, CopyString)); } void VariableUnserializer::unserializeCollection(ObjectData* obj, int64_t sz, char type) { switch (obj->collectionType()) { case CollectionType::Pair: unserializePair(obj, sz, type); break; case CollectionType::Vector: case CollectionType::ImmVector: unserializeVector(obj, sz, type); break; case CollectionType::Map: case CollectionType::ImmMap: unserializeMap(obj, sz, type); break; case CollectionType::Set: case CollectionType::ImmSet: unserializeSet(obj, sz, type); break; } } void VariableUnserializer::unserializeVector(ObjectData* obj, int64_t sz, char type) { if (type != 'V') throwBadFormat(obj, type); auto const sizeClass = VanillaVec::capacityToSizeIndex(sz); auto const allocsz = MemoryManager::sizeIndex2Size(sizeClass); // For large vectors, do a naive pre-check for OOM. if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } auto bvec = static_cast<BaseVector*>(obj); bvec->reserve(sz); reserveForAdd(sz); for (int64_t i = 0; i < sz; ++i) { auto tv = bvec->appendForUnserialize(i); HPHP::type(tv) = KindOfNull; unserializeVariant(tv); } } /* * Attempts to unserialize into an initially empty HH\Map of string->int/string. * Returns false and leaves both 'map' and 'uns' untouched on failure, including * unexpected types and possibly legal, but uncommon, encodings. */ NEVER_INLINE bool VariableUnserializer::tryUnserializeStrIntMap(BaseMap* map, int64_t sz) { auto b = head(); /* * For efficiency, we don't add the keys/values to m_refs, so don't support * back-references appearing after this point. For simplicity, we thus require * this map to be the root object being unserialized. */ if (folly::StringPiece(begin(), b) != folly::to<std::string>("K:6:\"HH\\Map\":", sz, ":{")) { return false; } auto const end = this->end(); auto const maxKeyLen = RuntimeOption::MaxSerializedStringSize; /* * First, parse the entire input and allocate the keys (accessing lots of * data, but mostly sequentially). */ auto checkPoint = map->batchInsertBegin(sz); int64_t i = 0; for (; i < sz; ++i) { auto sd = readStringData(b, end, maxKeyLen); if (!sd) break; String key = String::attach(sd); auto tv = map->batchInsert(key.get()); tv->m_type = KindOfNull; if (*b == 'i') { if (!readInt64(b, end, tv->m_data.num)) break; tv->m_type = KindOfInt64; } else if (*b == 's') { auto sd = readStringData(b, end, maxKeyLen); if (!sd) break; tv->m_data.pstr = sd; tv->m_type = KindOfString; } else { break; } } /* * On success, finalize the hash table insertion (very random access). */ if (i == sz && map->tryBatchInsertEnd(checkPoint)) { set(b, end); return true; } map->batchInsertAbort(checkPoint); return false; } void VariableUnserializer::unserializeMap(ObjectData* obj, int64_t sz, char type) { if (type != 'K') throwBadFormat(obj, type); // For large maps, do a naive pre-check for OOM. auto const allocsz = VanillaDict::computeAllocBytesFromMaxElms(sz); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } auto map = static_cast<BaseMap*>(obj); map->reserve(sz); if (sz >= RuntimeOption::UnserializationBigMapThreshold && tryUnserializeStrIntMap(map, sz)) { return; } reserveForAdd(sz + sz); // keys + values for (int64_t i = 0; i < sz; ++i) { Variant k; unserializeVariant(k.asTypedValue(), UnserializeMode::ColKey); TypedValue* tv = nullptr; if (k.isInteger()) { auto h = k.toInt64(); tv = map->findForUnserialize(h); // Be robust against manually crafted inputs with conflicting elements if (UNLIKELY(!tv)) { tv = k.asTypedValue(); goto do_unserialize; } } else if (k.isString()) { auto key = k.getStringData(); tv = map->findForUnserialize(key); // Be robust against manually crafted inputs with conflicting elements if (UNLIKELY(!tv)) { tv = k.asTypedValue(); goto do_unserialize; } } else { throwInvalidKey(); } tv->m_type = KindOfNull; do_unserialize: unserializeVariant(tv); } } void VariableUnserializer::unserializeSet(ObjectData* obj, int64_t sz, char type) { if (type != 'V') throwBadFormat(obj, type); // For large maps, do a naive pre-check for OOM. auto const allocsz = VanillaDict::computeAllocBytesFromMaxElms(sz); if (UNLIKELY(allocsz > kMaxSmallSize && tl_heap->preAllocOOM(allocsz))) { check_non_safepoint_surprise(); } auto set = static_cast<BaseSet*>(obj); set->reserve(sz); reserveForAdd(sz); for (int64_t i = 0; i < sz; ++i) { // When unserializing an element of a Set, we use Mode::ColKey for now. // This will make the unserializer to reserve an id for the element // but won't allow referencing the element via 'r' or 'R'. Variant k; unserializeVariant(k.asTypedValue(), UnserializeMode::ColKey); if (k.isInteger()) { set->add(k.toInt64()); } else if (k.isString()) { set->add(k.getStringData()); } else { throwInvalidHashKey(obj); } } } void VariableUnserializer::unserializePair(ObjectData* obj, int64_t sz, char type) { assertx(sz == 2); if (type != 'V') throwBadFormat(obj, type); auto pair = static_cast<c_Pair*>(obj); unserializeVariant(pair->at(0)); unserializeVariant(pair->at(1)); } //////////////////////////////////////////////////////////////////////////////// void VariableUnserializer::reserialize(StringBuffer& buf) { char type = readChar(); char sep = readChar(); if (type == 'N') { buf.append(type); buf.append(sep); return; } switch (type) { case 'r': case 'R': case 'b': case 'i': case 'd': case 'l': { buf.append(type); buf.append(sep); while (peek() != ';') { char ch; ch = readChar(); buf.append(ch); } } break; case 'S': case 'A': { // shouldn't happen, but keep the code here anyway. buf.append(type); buf.append(sep); auto str = readStr(8); buf.append(str.data(), str.size()); } break; case 's': { String v = unserializeString(); assertx(!v.isNull()); if (v.get()->isStatic()) { union { char pointer[8]; StringData *sd; } u; u.sd = v.get(); buf.append("S:"); buf.append(u.pointer, 8); buf.append(';'); } else { buf.append("s:"); buf.append(v.size()); buf.append(":\""); buf.append(v.data(), v.size()); buf.append("\";"); } sep = readChar(); return; } break; case 'a': case 'D': case 'Y': case 'H': { buf.append(type == 'a' ? "a:" : (type == 'Y' ? "Y:" : (type == 'D' ? "D:" : "H:"))); int64_t size = readInt(); char sep2 = readChar(); buf.append(size); buf.append(sep2); sep2 = readChar(); buf.append(sep2); for (int64_t i = 0; i < size; i++) { reserialize(buf); // key reserialize(buf); // value } sep2 = readChar(); // '}' buf.append(sep2); return; } break; case 'v': case 'k': case 'y': { buf.append(type == 'v' ? "v:" : (type == 'y' ? "y:" : "k:")); int64_t size = readInt(); char sep2 = readChar(); buf.append(size); buf.append(sep2); sep2 = readChar(); buf.append(sep2); for (int64_t i = 0; i < size; ++i) { reserialize(buf); } sep2 = readChar(); // '}' buf.append(sep2); return; } case 'o': case 'O': case 'V': case 'K': { buf.append(type); buf.append(sep); auto const clsName = unserializeStringPiece(); buf.append(static_cast<int>(clsName.size())); buf.append(":\""); buf.append(clsName.data(), clsName.size()); buf.append("\":"); readChar(); int64_t size = readInt(); char sep2 = readChar(); buf.append(size); buf.append(sep2); sep2 = readChar(); // '{' buf.append(sep2); // 'V' type is a series with values only, while all other // types are series with keys and values int64_t i = type == 'V' ? size : size * 2; while (i--) { reserialize(buf); } sep2 = readChar(); // '}' buf.append(sep2); return; } break; case 'C': { buf.append(type); buf.append(sep); auto const clsName = unserializeStringPiece(); buf.append(static_cast<int>(clsName.size())); buf.append(":\""); buf.append(clsName.data(), clsName.size()); buf.append("\":"); sep = readChar(); // ':' auto const serialized = unserializeStringPiece('{', '}'); buf.append(static_cast<int>(serialized.size())); buf.append(":{"); buf.append(serialized.data(), serialized.size()); buf.append('}'); return; } break; default: throwUnknownType(type); } sep = readChar(); // the last ';' buf.append(sep); } }

hphp/runtime/base/variable-unserializer.cpp (1,507 lines of code) (raw):