void VariableUnserializer::unserializeVariant()

in hphp/runtime/base/variable-unserializer.cpp [746:1212]


void VariableUnserializer::unserializeVariant(
    tv_lval self,
    UnserializeMode mode /* = UnserializeMode::Value */) {

  // If we're overwriting an array element or property value, save the old
  // value in case it's later referenced via an r: or R: ref.
  if (isRefcountedType(self.type()) && mode == UnserializeMode::Value) {
    m_overwrittenList.append(*self);
  }

  // NOTE: If you make changes to how serialization and unserialization work,
  // make sure to update reserialize() here and test_apc_reserialize()
  // in "test/ext/test_ext_apc.cpp".

  char type = readChar();
  char sep = readChar();

  if (type != 'R') {
    add(self, mode);
  }

  if (type == 'N') {
    if (sep != ';') throwUnexpectedSep(';', sep);
    tvSetNull(self); // NULL *IS* the value, without we get undefined warnings
    return;
  }
  if (sep != ':') throwUnexpectedSep(':', sep);

  switch (type) {
  case 'r':
  case 'R':
    {
      int64_t id = readInt();
      tvSet(getByVal(id), self);
    }
    break;
  case 'b':
    {
      int64_t v = readInt();
      tvSetBool((bool)v, self);
      break;
    }
  case 'i':
    {
      int64_t v = readInt();
      tvSetInt(v, self);
      break;
    }
  case 'd':
    {
      char ch = peek();
      bool negative = false;
      if (ch == '-') {
        negative = true;
        readChar();
        ch = peek();
      }
      double v;
      if (ch == 'I') {
        auto str = readStr(3);
        if (str.size() != 3 || str[1] != 'N' || str[2] != 'F') {
          throwUnexpectedStr("INF", str);
        }
        v = std::numeric_limits<double>::infinity();
      } else if (ch == 'N') {
        auto str = readStr(3);
        if (str.size() != 3 || str[1] != 'A' || str[2] != 'N') {
          throwUnexpectedStr("NAN", str);
        }
        v = std::numeric_limits<double>::quiet_NaN();
      } else {
        v = readDouble();
      }
      tvSetDouble(negative ? -v : v, self);
    }
    break;
  case 'l':
    {
      String c = unserializeString();
      if (mode == UnserializeMode::Value) {
        tvMove(
          make_tv<KindOfLazyClass>(
            LazyClassData::create(makeStaticString(c.get()))
          ),
          self
        );
      } else {
        if (RuntimeOption::EvalRaiseClassConversionWarning) {
          raise_class_to_string_conversion_warning();
        }
        tvMove(
          make_tv<KindOfPersistentString>(makeStaticString(c.get())), self
        );
      }
    }
    break;
  case 's':
    {
      String v = unserializeString();
      tvMove(make_tv<KindOfString>(v.detach()), self);
      if (!endOfBuffer()) {
        // Semicolon *should* always be required,
        // but PHP's implementation allows omitting it
        // and still functioning.
        // Worse, it throws it away without any check.
        // So we'll do the same.  Sigh.
        readChar();
      }
    }
    return;
  case 'S':
    if (this->type() == VariableUnserializer::Type::APCSerialize) {
      auto str = readStr(8);
      assertx(str.size() == 8);
      auto const sd = *reinterpret_cast<StringData*const*>(&str[0]);
      assertx(sd->isStatic());
      tvMove(make_tv<KindOfPersistentString>(sd), self);
    } else {
      throwUnknownType(type);
    }
    break;
  case 'a': // PHP array
  case 'D': // Dict
    {
      // Check stack depth to avoid overflow.
      check_recursion_throw();
      // It seems silly to check this here, but GCC actually generates much
      // better code this way.
      auto a = (type == 'a') ?
        unserializeArray() :
        unserializeDict();
      if (UNLIKELY(m_markLegacyArrays && type == 'a')) {
        a.setLegacyArray(true);
      }
      tvMove(make_array_like_tv(a.detach()), self);
    }
    return; // array has '}' terminating
  case 'X': // MarkedDArray
  case 'Y': // DArray
    {
      // Check stack depth to avoid overflow.
      check_recursion_throw();
      auto a = unserializeDArray();
      if (UNLIKELY(m_markLegacyArrays || type == 'X')) {
        a.setLegacyArray(true);
      }
      tvMove(make_array_like_tv(a.detach()), self);
    }
    return; // array has '}' terminating
  case 'x': // MarkedVArray
  case 'y': // VArray
    {
      // Check stack depth to avoid overflow.
      check_recursion_throw();
      auto a = unserializeVArray();
      if (UNLIKELY(m_markLegacyArrays || type == 'x')) {
        a.setLegacyArray(true);
      }
      tvMove(make_array_like_tv(a.detach()), self);
    }
    return; // array has '}' terminating
  case 'v': // Vec
    {
      // Check stack depth to avoid overflow.
      check_recursion_throw();
      auto a = unserializeVec();
      tvMove(make_tv<KindOfVec>(a.detach()), self);
    }
    return; // array has '}' terminating
  case 'k': // Keyset
    {
      // Check stack depth to avoid overflow.
      check_recursion_throw();
      auto a = unserializeKeyset();
      tvMove(make_tv<KindOfKeyset>(a.detach()), self);
    }
    return; // array has '}' terminating
  case 'L':
    {
      int64_t id = readInt();
      expectChar(':');
      String rsrcName = unserializeString();
      expectChar('{');
      expectChar('}');
      auto rsrc = req::make<DummyResource>();
      rsrc->o_setResourceId(id);
      rsrc->m_class_name = std::move(rsrcName);
      tvMove(make_tv<KindOfResource>(rsrc.detach()->hdr()), self);
    }
    return; // resource has '}' terminating
  case 'O':
  case 'V':
  case 'K':
    {
      String clsName = unserializeString();

      expectChar(':');
      const int64_t size = readInt();
      expectChar(':');
      expectChar('{');

      const bool allowObjectFormatForCollections = true;

      Class* cls = nullptr;

      // If we are potentially dealing with a collection, we need to try to
      // load the collection class under an alternate name so that we can
      // deserialize data that was serialized before the migration of
      // collections to the HH namespace.

      if (type == 'O') {
        if (whitelistCheck(clsName)) {
          if (allowObjectFormatForCollections) {
            // In order to support the legacy {O|V}:{Set|Vector|Map}
            // serialization, we defer autoloading until we know that there's
            // no alternate (builtin) collection class.
            cls = Class::get(clsName.get(), /* autoload */ false);
            if (!cls) {
              cls = tryAlternateCollectionClass(clsName.get());
            }
          }

          // No valid class was found, lets try the autoloader.
          if (!cls) {
            if (!is_valid_class_name(clsName.slice())) {
              throwInvalidClassName();
            }
            cls = Class::load(clsName.get()); // with autoloading
          }
        }
      } else {
        // Collections are CPP builtins; don't attempt to autoload
        cls = Class::get(clsName.get(), /* autoload */ false);
        if (!cls) {
          cls = tryAlternateCollectionClass(clsName.get());
        }
        if (!cls || !cls->isCollectionClass()) {
          throwNotCollection(clsName);
        }
      }

      Object obj;
      auto remainingProps = size;
      if (cls) {
        // Only unserialize CPP extension types which can actually support
        // it. Otherwise, we risk creating a CPP object without having it
        // initialized completely.
        if (cls->instanceCtor() && !cls->isCppSerializable() &&
            !cls->isCollectionClass()) {
          assertx(obj.isNull());
          throw_null_pointer_exception();
        } else {
          if (UNLIKELY(collections::isType(cls, CollectionType::Pair))) {
            if (UNLIKELY(size != 2)) {
              throwInvalidPair();
            }
            // pairs can't be constructed without elements
            obj = Object{req::make<c_Pair>(make_tv<KindOfNull>(),
                                           make_tv<KindOfNull>(),
                                           c_Pair::NoIncRef{})};
          } else if (UNLIKELY(cls->hasReifiedGenerics())) {
            // First prop on the serialized list is the reified generics prop
            if (!matchString(s_86reified_prop.slice())) {
              throwInvalidOFormat(clsName);
            }
            TypedValue tv = make_tv<KindOfNull>();
            auto const t = tv_lval{&tv};
            unserializePropertyValue(t, remainingProps--);
            if (!TypeStructure::coerceToTypeStructureList_SERDE_ONLY(t)) {
              throwInvalidOFormat(clsName);
            }
            assertx(tvIsVec(t));
            obj = Object{cls, t.val().parr};
          } else {
            obj = Object{cls};
          }
        }
      } else {
        warnOrThrowUnknownClass(clsName);
        obj = Object{SystemLib::s___PHP_Incomplete_ClassClass};
        obj->setProp(nullptr, s_PHP_Incomplete_Class_Name.get(),
                     clsName.asTypedValue());
      }
      assertx(!obj.isNull());
      tvSet(make_tv<KindOfObject>(obj.get()), self);

      if (remainingProps > 0) {
        // Check stack depth to avoid overflow.
        check_recursion_throw();

        if (type == 'O') {
          // Collections are not allowed
          if (obj->isCollection()) {
            throwInvalidOFormat(clsName);
          }

          Variant serializedNativeData = init_null();
          bool hasSerializedNativeData = false;
          bool checkRepoAuthType =
            RuntimeOption::RepoAuthoritative &&
            RepoFile::globalData().HardPrivatePropInference;
          Class* objCls = obj->getVMClass();
          // Try fast case.
          if (remainingProps >= objCls->numDeclProperties() -
                                (objCls->hasReifiedGenerics() ? 1 : 0)) {
            auto mismatch = false;
            auto const objProps = obj->props();

            auto const declProps = objCls->declProperties();
            for (auto const& p : declProps) {
              auto slot = p.serializationIdx;
              auto index = objCls->propSlotToIndex(slot);
              auto const& prop = declProps[slot];
              if (prop.name == s_86reified_prop.get()) continue;
              if (!matchString(prop.mangledName->slice())) {
                mismatch = true;
                break;
              }

              // don't need to worry about overwritten list, because
              // this is definitely the first time we're setting this
              // property.
              auto const t = objProps->at(index);
              unserializePropertyValue(t, remainingProps--);

              if (UNLIKELY(checkRepoAuthType &&
                           !tvMatchesRepoAuthType(*t, prop.repoAuthType))) {
                throwUnexpectedType(prop.name, obj.get(), *t);
              }
            }
            // If everything matched, all remaining properties are dynamic.
            if (!mismatch && remainingProps > 0) {
              // the dynPropTable can be mutated while we're deserializing
              // the contents of this object's prop array. Don't hold a
              // reference to this object's entry in the table while looping.
              obj->reserveDynProps(remainingProps);
              while (remainingProps > 0) {
                Variant v;
                unserializeVariant(v.asTypedValue(), UnserializeMode::Key);
                String key = v.toString();
                if (key == s_serializedNativeDataKey) {
                  unserializePropertyValue(serializedNativeData.asTypedValue(),
                                           remainingProps--);
                  hasSerializedNativeData = true;
                } else {
                  auto kdata = key.data();
                  if (kdata[0] == '\0') {
                    auto ksize = key.size();
                    if (UNLIKELY(ksize == 0)) {
                      raise_error("Cannot access empty property");
                    }
                    // private or protected
                    auto subLen = strlen(folly::launder(kdata) + 1) + 2;
                    if (UNLIKELY(subLen >= ksize)) {
                      if (subLen == ksize) {
                        raise_error("Cannot access empty property");
                      } else {
                        throwMangledPrivateProperty();
                      }
                    }
                  }
                  auto const lval = obj->makeDynProp(key.get());
                  unserializePropertyValue(lval, remainingProps--);
                }
              }
            }
          }
          if (remainingProps > 0) {
            INC_TPC(unser_prop_slow);
            unserializeRemainingProps(obj, remainingProps,
                                      serializedNativeData,
                                      hasSerializedNativeData);
            remainingProps = 0;
          } else {
            INC_TPC(unser_prop_fast);
          }

          // Verify that all the unserialized properties satisfy their
          // type-hints. Its safe to do it like this (after we've set the values
          // in the properties) because this object hasn't escaped to the
          // outside world yet.
          obj->verifyPropTypeHints();

          // nativeDataWakeup is called last to ensure that all properties are
          // already unserialized. We also ensure that nativeDataWakeup is
          // invoked regardless of whether or not serialized native data exists
          // within the serialized content.
          if (obj->hasNativeData() &&
              obj->getVMClass()->getNativeDataInfo()->isSerializable()) {
            Native::nativeDataWakeup(obj.get(), serializedNativeData);
          } else if (hasSerializedNativeData) {
            raise_warning("%s does not expect any serialized native data.",
                          clsName.data());
          }
        } else {
          assertx(type == 'V' || type == 'K');
          if (!obj->isCollection()) {
            throwNotCollection(clsName);
          }
          unserializeCollection(obj.get(), size, type);
        }
      }
      expectChar('}');

      if (cls &&
          cls->lookupMethod(s___wakeup.get()) &&
          (this->type() != VariableUnserializer::Type::DebuggerSerialize ||
           (cls->instanceCtor() && cls->isCppSerializable()))) {
        // Don't call wakeup when unserializing for the debugger, except for
        // natively implemented classes.
        addSleepingObject(obj);
      }

      check_non_safepoint_surprise();
    }
    return; // object has '}' terminating
  case 'C':
    {
      if (this->type() == VariableUnserializer::Type::DebuggerSerialize) {
        raise_error("Debugger shouldn't call custom unserialize method");
      }
      String clsName = unserializeString();

      expectChar(':');
      String serialized = unserializeString('{', '}');

      auto obj = [&]() -> Object {
        if (whitelistCheck(clsName)) {
          // Try loading without the autoloader first
          auto cls = Class::get(clsName.get(), /* autoload */ false);
          if (!cls) {
            if (!is_valid_class_name(clsName.slice())) {
              throwInvalidClassName();
            }
            cls = Class::load(clsName.get());
          }
          if (cls) {
            return Object::attach(g_context->createObject(cls, init_null_variant,
                                                          false /* init */));
          }
        }
        if (!allowUnknownSerializableClass()) {
          raise_error("unknown class %s", clsName.data());
        }
        warnOrThrowUnknownClass(clsName);
        Object ret = create_object_only(s_PHP_Incomplete_Class);
        ret->setProp(nullptr, s_PHP_Incomplete_Class_Name.get(),
                     clsName.asTypedValue());
        ret->setProp(nullptr, s_serialized.get(), serialized.asTypedValue());
        return ret;
      }();

      if (!obj->instanceof(SystemLib::s_SerializableClass)) {
        raise_warning("Class %s has no unserializer",
                      obj->getClassName().data());
      } else {
        obj->o_invoke_few_args(s_unserialize, RuntimeCoeffects::fixme(), 1, serialized);
      }

      tvMove(make_tv<KindOfObject>(obj.detach()), self);
    }
    return; // object has '}' terminating
  default:
    throwUnknownType(type);
  }
  expectChar(';');
}