hphp/runtime/base/variable-serializer.h (231 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#pragma once
#include "hphp/runtime/base/req-hash-map.h"
#include "hphp/runtime/base/req-vector.h"
#include "hphp/runtime/base/runtime-option.h"
#include "hphp/runtime/base/string-buffer.h"
#include "hphp/runtime/base/string-data.h"
#include "hphp/runtime/base/tv-mutate.h"
#include "hphp/runtime/base/tv-variant.h"
#include "hphp/runtime/base/type-variant.h"
#include "hphp/runtime/vm/class.h"
#include "hphp/runtime/vm/class-meth-data-ref.h"
#include "hphp/util/rds-local.h"
namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
/**
* Maintaining states during serialization of a variable. We use this single
* class to uniformly serialize variables according to different formats.
*/
struct VariableSerializer {
/**
* Supported formats.
*/
enum class Type {
PrintR, //print_r()
VarExport, //var_export()
VarDump, //var_dump()
DebugDump, //debug_zval_dump()
DebuggerDump, //used by hphp debugger to obtain user visible output
Serialize, // serialize()
Internal, // used internally by the compiler. No compatibility guarantees.
JSON, //json_encode()
APCSerialize, //used in APC serialization (controlled by switch)
DebuggerSerialize, //used by hphp debugger for client<->proxy communication
PHPOutput, //used by compiler to output scalar values into byte code
Last = PHPOutput,
};
/**
* Constructor and destructor.
*/
explicit VariableSerializer(Type type, int option = 0, int maxRecur = 3);
~VariableSerializer();
VariableSerializer(const VariableSerializer&) = delete;
VariableSerializer& operator=(const VariableSerializer&) = delete;
// Use UnlimitSerializationScope to suspend this temporarily.
struct SerializationLimitWrapper {
int64_t value = StringData::MaxSize;
};
static RDS_LOCAL(SerializationLimitWrapper, serializationSizeLimit);
/**
* Top level entry function called by f_ functions.
*/
String serialize(const_variant_ref v, bool ret, bool keepCount = false);
String serialize(const Variant& var, bool ret, bool keepCount = false) {
return serialize(const_variant_ref{var}, ret, keepCount);
}
String serializeValue(const Variant& v, bool limit);
// Serialize with limit size of output, always return the serialized string.
// It does not work with Serialize, JSON, APCSerialize, DebuggerSerialize.
String serializeWithLimit(const Variant& v, int limit);
// for ext_json
void setDepthLimit(size_t depthLimit) { m_maxDepth = depthLimit; }
// for ext_std_variable
void incMaxCount() { m_maxCount++; }
Type getType() const { return m_type; }
// By default, for Type::Serialize, d/varrays are serialized as normal
// arrays. This flag can override that behavior.
void keepDVArrays() { m_keepDVArrays = true; }
// Force Hack arrays to serialize as PHP arrays.
void setForcePHPArrays() { m_forcePHPArrays = true; }
// Force PHP arrays to serialize as Hack arrays. This mode is preferred.
// In this mode, all decisions are driven by types. E.g., for JSON:
// - varrays and vecs are encoded as lists: [...]
// - darrays and dicts are encoded as maps: {...}
void setForceHackArrays() { m_forceHackArrays = true; }
// Emit a HAC notice on serialization of the specified kind of array.
void setHackWarn() { m_hackWarn = true; }
void setDictWarn() { m_dictWarn = true; }
void setKeysetWarn() { m_keysetWarn = true; }
void setPHPWarn() { m_phpWarn = true; }
void setEmptyDArrayWarn() { m_edWarn = true; }
void setVecLikeDArrayWarn() { m_vdWarn = true; }
void setDictLikeDArrayWarn() { m_ddWarn = true; }
// ignore uninitialized late init props and do not attempt to serialize them
void setIgnoreLateInit() { m_ignoreLateInit = true; }
// Serialize legacy bit and provenance tag, using same format as
// Type::Internal serializer. This is only supported Type::Serialize.
void setSerializeProvenanceAndLegacy() {
assertx(getType() == Type::Serialize);
m_serializeProvenanceAndLegacy = true;
}
void setDisallowObjects() { m_disallowObjects = true; }
// Should we be calling the pure callbacks
void setPure() { m_pure = true; }
// MarkedVArray and MarkedDArray are used for serialization formats, which
// can distinguish between all 3 possible array states (unmarked varray,
// unmarked vec, marked varray/vec). Now corresponds to marked vec/dict.
enum class ArrayKind { PHP, Dict, Vec, Keyset, VArray, DArray,
MarkedVArray, MarkedDArray };
void setUnitFilename(const StringData* name) {
assertx(name->isStatic());
assertx(getType() == Type::Internal);
m_unitFilename = name;
}
private:
/**
* Type specialized output functions.
*/
void write(bool v);
void write(char v) { write((int64_t)v);}
void write(short v) { write((int64_t)v);}
void write(int v) { write((int64_t)v);}
void write(int64_t v);
void write(double v);
void write(const char *v, int len = -1, bool isArrayKey = false,
bool noQuotes = false);
void write(const String& v);
void write(const Object& v);
void write(const_variant_ref v, bool isArrayKey = false);
void writeNull();
// what to write if recursive level is over limit?
void writeOverflow(tv_rval tv);
void writeRefCount(); // for DebugDump only
void writeArrayHeader(int size, bool isVectorData, ArrayKind kind);
void writeArrayKey(const Variant& key, ArrayKind kind);
void writeArrayValue(
const Variant& value,
ArrayKind kind
);
void writeCollectionKey(
const Variant& key,
ArrayKind kind
);
void writeArrayFooter(ArrayKind kind);
void writeSerializableObject(const String& clsname, const String& serialized);
/**
* Helpers.
*/
void indent();
void setRefCount(RefCount count) { m_refCount = count; }
bool incNestedLevel(tv_rval tv);
void decNestedLevel(tv_rval tv);
void pushObjectInfo(const String& objClass, char objCode);
void popObjectInfo();
void pushResourceInfo(const String& rsrcName, int rsrcId);
void popResourceInfo();
ArrayKind getKind(const ArrayData* arr) const;
// The func parameter will be invoked only if there is no overflow.
// Otherwise, writeOverflow will be invoked instead.
void preventOverflow(const Object& v, const std::function<void()>& func);
void writePropertyKey(const String& prop);
// Serialize a Variant recursively.
// The last param noQuotes indicates to serializer to not put the output in
// double quotes (used when printing the output of a __toDebugDisplay() of
// an object when it is a string.
void serializeVariant(tv_rval value,
bool isArrayKey = false,
bool skipNestCheck = false,
bool noQuotes = false);
void serializeObject(const Object&);
void serializeObject(const ObjectData*);
void serializeObjectImpl(const ObjectData* obj);
void serializeCollection(ObjectData* obj);
void serializeObjProps(Array&);
void serializeArray(const ArrayData*, bool skipNestCheck = false);
void serializeArrayImpl(const ArrayData* arr, bool isVectorData);
void serializeResource(const ResourceData*);
void serializeResourceImpl(const ResourceData* res);
void serializeString(const String&);
void serializeRFunc(const RFuncData* func);
void serializeFunc(const Func* func);
void serializeClass(const Class* cls);
void serializeLazyClass(LazyClassData);
void serializeClsMeth(ClsMethDataRef clsMeth, bool skipNestCheck = false);
void serializeRClsMeth(RClsMethData* rclsMeth);
Array getSerializeProps(const ObjectData* obj) const;
private:
// Sentinel used to indicate that a member of SavedRefMap has a count but no
// ID.
static constexpr int NO_ID = -1;
struct SavedRefMap {
~SavedRefMap();
struct MapData {
int m_count{0};
int m_id{-1};
};
MapData& operator[](tv_rval tv) {
auto& elm = m_mapping[*tv];
if (!elm.m_count) tvIncRefGen(*tv);
return elm;
}
const MapData& operator[](tv_rval tv) const {
return m_mapping.at(*tv);
}
private:
struct TvHash {
std::size_t operator()(const TypedValue& tv) const {
return pointer_hash<void>()(tv.m_data.parr);
}
};
struct TvEq {
bool operator()(const TypedValue& a, const TypedValue& b) const {
return a.m_data.parr == b.m_data.parr;
}
};
req::fast_map<TypedValue, MapData, TvHash, TvEq> m_mapping;
};
private:
Type m_type;
int m_option; // type specific extra options
StringBuffer *m_buf{nullptr};
int m_indent{0};
SavedRefMap m_refs; // reference ids and counts for objs/arrays
int m_valueCount{0}; // current ref index
bool m_keepDVArrays; // serialize d/varrays as themselves or arrays
bool m_forcePHPArrays{false}; // serialize PHP and Hack arrays as PHP arrays
bool m_forceHackArrays{false}; // serialize PHP and Hack arrays as Hack arrays
bool m_serializeProvenanceAndLegacy{false}; // serialize provenance tags and
// legacy bit
bool m_hackWarn{false}; // warn when attempting on Hack arrays
bool m_dictWarn{false}; // warn when attempting on dicts
bool m_keysetWarn{false}; // warn when attempting on keysets
bool m_phpWarn{false}; // warn when attempting on PHP arrays
bool m_edWarn{false}; // warn when attempting on empty darrays
bool m_vdWarn{false}; // warn when attempting on vec-like darrays
bool m_ddWarn{false}; // warn when attempting on non-vec-like darrays
bool m_ignoreLateInit{false}; // ignore uninitalized late init props
bool m_disallowObjects{false}; // throw if serializing non-collection object
bool m_hasHackWarned{false}; // have we already warned on Hack arrays?
bool m_hasDictWarned{false}; // have we already warned on dicts?
bool m_hasKeysetWarned{false}; // have we already warned on dicts?
bool m_hasEDWarned{false}; // have we already warned on empty darrays?
bool m_hasVDWarned{false}; // have we already warned on vec-like darrays?
bool m_hasDDWarned{false}; // have we already warned on non-vec-like darrays?
bool m_pure{false}; // should we call the pure callbacks?
RefCount m_refCount{OneReference}; // current variable's reference count
String m_objClass; // for object serialization
char m_objCode{0}; // for object serialization
String m_rsrcName; // for resource serialization
int m_rsrcId{0}; // for resource serialization
int m_maxCount; // for max recursive levels
int m_levelDebugger{0}; // keep track of levels for DebuggerSerialize
int m_maxLevelDebugger{0}; // for max level of DebuggerSerialize
size_t m_currentDepth{0}; // current depth (nasted objects/arrays)
size_t m_maxDepth{0}; // max depth limit before an error (0 -> none)
bool m_keyPrinted{false};
struct ArrayInfo {
bool is_object; // nested arrays or objects
bool is_vector; // whether current array is a vector
bool first_element; // whether this is first array element
int indent_delta; // the extra indent to serialize this object
int size; // the number of elements in the array
};
req::vector<ArrayInfo> m_arrayInfos;
struct ObjectInfo {
String objClass;
char objCode;
String rsrcName;
int rsrcId;
};
req::vector<ObjectInfo> m_objectInfos;
/* unitFilename should be set when we are serializing
* an adata for a unit in the repo--it is needed to correctly
* compress the provenance tag */
const StringData* m_unitFilename{nullptr};
};
inline String internal_serialize(const Variant& v) {
VariableSerializer vs{VariableSerializer::Type::Internal};
return vs.serializeValue(v, false);
}
// TODO: Move to util/folly?
template<typename T> struct TmpAssign {
TmpAssign(T& v, const T tmp) : cur(v), save(cur) { cur = tmp; }
~TmpAssign() { cur = save; }
T& cur;
const T save;
};
struct UnlimitSerializationScope {
static constexpr int32_t kTmpLimit = StringData::MaxSize;
TmpAssign<int64_t> v{VariableSerializer::serializationSizeLimit->value,
kTmpLimit};
TmpAssign<int64_t> rs{RuntimeOption::SerializationSizeLimit, kTmpLimit};
TmpAssign<int32_t> rm{RuntimeOption::MaxSerializedStringSize, kTmpLimit};
};
extern const StaticString s_serializedNativeDataKey;
///////////////////////////////////////////////////////////////////////////////
}