lib/VM/IdentifierTable.cpp (454 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #define DEBUG_TYPE "identtable" #include "hermes/VM/IdentifierTable.h" #include "hermes/Support/UTF8.h" #include "hermes/VM/CallResult.h" #include "hermes/VM/StringBuilder.h" #include "hermes/VM/StringPrimitive.h" #include "hermes/VM/StringView.h" #include "llvh/Support/Debug.h" namespace hermes { namespace vm { IdentifierTable::LookupEntry::LookupEntry( StringPrimitive *str, bool isNotUniqued) : strPrim_(str), isUTF16_(false), isNotUniqued_(isNotUniqued), num_(NON_LAZY_STRING_PRIM_TAG) { assert(str && "Invalid string primitive pointer"); llvh::SmallVector<char16_t, 32> storage{}; str->appendUTF16String(storage); hash_ = hermes::hashString(llvh::ArrayRef<char16_t>(storage)); } IdentifierTable::IdentifierTable() { hashTable_.setIdentifierTable(this); } CallResult<Handle<SymbolID>> IdentifierTable::getSymbolHandle( Runtime &runtime, UTF16Ref str, uint32_t hash) { auto cr = getOrCreateIdentifier( runtime, str, Runtime::makeNullHandle<StringPrimitive>(), hash); if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) return ExecutionStatus::EXCEPTION; return runtime.makeHandle(*cr); } CallResult<Handle<SymbolID>> IdentifierTable::getSymbolHandle( Runtime &runtime, ASCIIRef str, uint32_t hash) { auto cr = getOrCreateIdentifier( runtime, str, Runtime::makeNullHandle<StringPrimitive>(), hash); if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) return ExecutionStatus::EXCEPTION; return runtime.makeHandle(*cr); } SymbolID IdentifierTable::registerLazyIdentifier(ASCIIRef str) { return registerLazyIdentifierImpl(str, hermes::hashString(str)); } SymbolID IdentifierTable::registerLazyIdentifier(ASCIIRef str, uint32_t hash) { return registerLazyIdentifierImpl(str, hash); } SymbolID IdentifierTable::registerLazyIdentifier(UTF16Ref str) { return registerLazyIdentifierImpl(str, hermes::hashString(str)); } SymbolID IdentifierTable::registerLazyIdentifier(UTF16Ref str, uint32_t hash) { return registerLazyIdentifierImpl(str, hash); } CallResult<Handle<SymbolID>> IdentifierTable::getSymbolHandleFromPrimitive( Runtime &runtime, PseudoHandle<StringPrimitive> str) { assert(str && "null string primitive"); if (str->isUniqued()) { // If the string was already uniqued, we can return directly. SymbolID id = str->getUniqueID(); symbolReadBarrier(id.unsafeGetIndex()); return runtime.makeHandle(id); } auto handle = runtime.makeHandle(std::move(str)); // Force the string primitive to flatten if it's a rope. handle = StringPrimitive::ensureFlat(runtime, handle); auto cr = handle->isASCII() ? getOrCreateIdentifier(runtime, handle->castToASCIIRef(), handle) : getOrCreateIdentifier(runtime, handle->castToUTF16Ref(), handle); if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) return ExecutionStatus::EXCEPTION; return runtime.makeHandle(*cr); } StringPrimitive *IdentifierTable::getStringPrim(Runtime &runtime, SymbolID id) { auto &entry = getLookupTableEntry(id); if (entry.isStringPrim()) { return entry.getStringPrimRef(); } // This is a lazy identifier, since a string primitive is requested, we must // materialize it. return materializeLazyIdentifier(runtime, id); } StringView IdentifierTable::getStringView(Runtime &runtime, SymbolID id) const { auto &entry = getLookupTableEntry(id); if (entry.isStringPrim()) { // The const_cast is a mechanical requirement as it's not worth it to // add const version constructors for Handle. Handle<StringPrimitive> handle{ runtime, const_cast<StringPrimitive *>(entry.getStringPrim())}; // We know that this string already exists in the identifier table, // and hence it's safe to call the const version of getStringView. return StringPrimitive::createStringViewMustBeFlat(handle); } if (entry.isLazyASCII()) { return StringView(entry.getLazyASCIIRef()); } return StringView(entry.getLazyUTF16Ref()); } StringView IdentifierTable::getStringViewForDev(Runtime &runtime, SymbolID id) const { if (id == SymbolID::empty()) { assert(false && "getStringViewForDev on empty SymbolID"); return "<<empty>>"; } if (id == SymbolID::deleted()) { assert(false && "getStringViewForDev on deleted SymbolID"); return "<<deleted>>"; } if (id.isInvalid()) { assert(false && "getStringViewForDev on invalid SymbolID"); return "<<invalid>>"; } return getStringView(runtime, id); } std::string IdentifierTable::convertSymbolToUTF8(SymbolID id) { auto &vectorEntry = getLookupTableEntry(id); if (vectorEntry.isStringPrim()) { const StringPrimitive *stringPrim = vectorEntry.getStringPrim(); llvh::SmallVector<char16_t, 16> tmp; stringPrim->appendUTF16String(tmp); std::string out; convertUTF16ToUTF8WithReplacements(out, UTF16Ref{tmp}); return out; } else if (vectorEntry.isLazyASCII()) { auto asciiRef = vectorEntry.getLazyASCIIRef(); return std::string{asciiRef.begin(), asciiRef.end()}; } else if (vectorEntry.isLazyUTF16()) { auto ref = vectorEntry.getLazyUTF16Ref(); std::string out; convertUTF16ToUTF8WithReplacements(out, ref); return out; } else { llvm_unreachable("Invalid symbol given"); } // To avoid compiler warnings. return ""; } void IdentifierTable::markIdentifiers(RootAcceptor &acceptor, GC *gc) { for (auto &vectorEntry : lookupVector_) { if (!vectorEntry.isFreeSlot() && vectorEntry.isStringPrim()) { assert( !gc->inYoungGen(vectorEntry.getStringPrimRef()) && "Identifiers must be allocated in the old gen"); acceptor.acceptPtr(vectorEntry.getStringPrimRef()); } } } void IdentifierTable::snapshotAddNodes(HeapSnapshot &snap) { snap.beginNode(); snap.endNode( HeapSnapshot::NodeType::Native, "std::vector<LookupEntry>", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableLookupVector), lookupVector_.capacity() * sizeof(LookupEntry), 0); snap.beginNode(); snap.endNode( HeapSnapshot::NodeType::Native, "CompactArray", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableHashTable), hashTable_.additionalMemorySize(), 0); snap.beginNode(); snap.endNode( HeapSnapshot::NodeType::Native, "BitVector", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableMarkedSymbols), markedSymbols_.getMemorySize(), 0); } void IdentifierTable::snapshotAddEdges(HeapSnapshot &snap) { snap.addNamedEdge( HeapSnapshot::EdgeType::Internal, "lookupVector", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableLookupVector)); snap.addNamedEdge( HeapSnapshot::EdgeType::Internal, "hashTable", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableHashTable)); snap.addNamedEdge( HeapSnapshot::EdgeType::Internal, "markedSymbols", GCBase::IDTracker::reserved( GCBase::IDTracker::ReservedObjectID::IdentifierTableMarkedSymbols)); } void IdentifierTable::visitIdentifiers( const std::function<void(SymbolID, const StringPrimitive *)> &acceptor) { for (uint32_t i = 0; i < getSymbolsEnd(); ++i) { auto &vectorEntry = getLookupTableEntry(i); if (!vectorEntry.isFreeSlot()) { const StringPrimitive *str = vectorEntry.isStringPrim() ? vectorEntry.getStringPrim() : nullptr; acceptor(SymbolID::unsafeCreate(i), str); } } } template <typename T, bool Unique> CallResult<PseudoHandle<StringPrimitive>> IdentifierTable::allocateDynamicString( Runtime &runtime, llvh::ArrayRef<T> str, Handle<StringPrimitive> primHandle) { size_t length = str.size(); assert( (!primHandle || primHandle->isFlat()) && "StringPrimitive must be flat"); GCScope gcScope(runtime); PseudoHandle<StringPrimitive> result; if (StringPrimitive::isExternalLength(length)) { if (LLVM_UNLIKELY(length > StringPrimitive::MAX_STRING_LENGTH)) { return runtime.raiseRangeError("String length exceeds limit"); } std::basic_string<T> stdString(str.begin(), str.end()); auto cr = ExternalStringPrimitive<T>::createLongLived( runtime, std::move(stdString)); if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) { return ExecutionStatus::EXCEPTION; } result = createPseudoHandle(vmcast<StringPrimitive>(*cr)); } else { auto *tmp = runtime.makeAVariable< DynamicStringPrimitive<T, Unique>, HasFinalizer::No, LongLived::Yes>( DynamicStringPrimitive<T, Unique>::allocationSize((uint32_t)length), length); // Since we keep a raw pointer to mem, no more JS heap allocations after // this point. NoAllocScope _(runtime); if (primHandle) { str = primHandle->getStringRef<T>(); } std::copy(str.begin(), str.end(), tmp->getRawPointerForWrite()); result = createPseudoHandle<StringPrimitive>(tmp); } return result; } void IdentifierTable::symbolReadBarrier(uint32_t id) { // Set the mark bool inside the symbol table entry so that this symbol isn't // garbage collected. // The reason this exists is that a Symbol can be retrieved via a string hash // that doesn't otherwise keep the symbol alive while in the middle of a GC. markedSymbols_.set(id); } uint32_t IdentifierTable::allocIDAndInsert( uint32_t hashTableIndex, StringPrimitive *strPrim) { uint32_t nextId = allocNextID(); SymbolID symbolId = SymbolID::unsafeCreate(nextId); assert(lookupVector_[nextId].isFreeSlot() && "Allocated a non-free slot"); strPrim->convertToUniqued(symbolId); // We must assign strPrim to the lookupVector before inserting to // hashTable_, because inserting to hashTable_ could trigger a grow/rehash, // which requires accessing the newly inserted string primitive. new (&lookupVector_[nextId]) LookupEntry(strPrim); hashTable_.insert(hashTableIndex, symbolId); LLVM_DEBUG( llvh::dbgs() << "allocated symbol " << nextId << " '" << strPrim << "'\n"); return nextId; } template <typename T> CallResult<SymbolID> IdentifierTable::getOrCreateIdentifier( Runtime &runtime, llvh::ArrayRef<T> str, Handle<StringPrimitive> maybeIncomingPrimHandle, uint32_t hash) { assert( !(maybeIncomingPrimHandle && maybeIncomingPrimHandle->isUniqued()) && "Should not call getOrCreateIdentifier with a uniqued StrPrim"); assert( (!maybeIncomingPrimHandle || maybeIncomingPrimHandle->isFlat()) && "StringPrimitive must be flat"); auto idx = hashTable_.lookupString(str, hash); if (hashTable_.isValid(idx)) { NoAllocScope scope{runtime}; const auto id = hashTable_.get(idx); // Read barrier here because a symbol value is getting read out of the hash // map. symbolReadBarrier(id); return SymbolID::unsafeCreate(id); } // It is tempting here to check whether the incoming StringPrimitive can be // uniqued, and use it instead of allocating a new one. The problem is that // identifiers must always be allocated in "long-lived" memory, but we don't // (yet) have a way of checking whether that is the case. If in the future we // did get that GC API, the check would look something like this: // \code // if (maybeIncomingPrimHandle && // LLVM_UNLIKELY(maybeIncomingPrimHandle->canBeUniqued()) && // runtime.getHeap().isLongLived(*maybeIncomingPrimHandle)) { // return SymbolID::unsafeCreate( // allocIDAndInsert(idx, maybeIncomingPrimHandle.get())); // } // \endcode CallResult<PseudoHandle<StringPrimitive>> cr = allocateDynamicString(runtime, str, maybeIncomingPrimHandle); if (cr == ExecutionStatus::EXCEPTION) { return ExecutionStatus::EXCEPTION; } // Allocate the id after we have performed memory allocations because a GC // would have freed id. return SymbolID::unsafeCreate(allocIDAndInsert(idx, cr->get())); } StringPrimitive *IdentifierTable::getExistingStringPrimitiveOrNull( Runtime &runtime, llvh::ArrayRef<char16_t> str) { auto idx = hashTable_.lookupString(str, hashString(str)); if (!hashTable_.isValid(idx)) { return nullptr; } // Use a handle since getStringPrim may need to materialize the string. const auto id = hashTable_.get(idx); symbolReadBarrier(id); Handle<SymbolID> sym(runtime, SymbolID::unsafeCreate(id)); return getStringPrim(runtime, *sym); } template <typename T> SymbolID IdentifierTable::registerLazyIdentifierImpl( llvh::ArrayRef<T> str, uint32_t hash) { auto idx = hashTable_.lookupString(str, hash); if (hashTable_.isValid(idx)) { // If the string is already in the table, return it. const auto id = hashTable_.get(idx); symbolReadBarrier(id); return SymbolID::unsafeCreate(id); } uint32_t nextId = allocNextID(); SymbolID symbolId = SymbolID::unsafeCreate(nextId); assert(lookupVector_[nextId].isFreeSlot() && "Allocated a non-free slot"); new (&lookupVector_[nextId]) LookupEntry(str, hash); hashTable_.insert(idx, symbolId); LLVM_DEBUG( llvh::dbgs() << "Allocated lazy identifier: " << nextId << " " << str << "\n"); return symbolId; } StringPrimitive *IdentifierTable::materializeLazyIdentifier( Runtime &runtime, SymbolID id) { auto &entry = getLookupTableEntry(id); assert( (entry.isLazyASCII() || entry.isLazyUTF16()) && "identifier is not lazy"); // This in theory can throw if running out of memory. However there are only // finite number of persistent identifiers, and we should always have enough // memory to hold them. PseudoHandle<StringPrimitive> strPrim = runtime.ignoreAllocationFailure( entry.isLazyASCII() ? allocateDynamicString( runtime, entry.getLazyASCIIRef(), Runtime::makeNullHandle<StringPrimitive>()) : allocateDynamicString( runtime, entry.getLazyUTF16Ref(), Runtime::makeNullHandle<StringPrimitive>())); if (id.isUniqued()) strPrim->convertToUniqued(id); LLVM_DEBUG(llvh::dbgs() << "Materializing lazy identifier " << id << "\n"); entry.materialize(strPrim.get()); return strPrim.get(); } void IdentifierTable::freeSymbol(uint32_t index) { assert(index < lookupVector_.size() && "Invalid symbol index to free"); assert( lookupVector_[index].isNonLazyStringPrim() && "The specified symbol cannot be freed"); LLVM_DEBUG( llvh::dbgs() << "Freeing symbol index " << index << " '" << lookupVector_[index].getStringPrim() << "'\n"); if (LLVM_LIKELY(!lookupVector_[index].isNotUniqued())) { auto *strPrim = const_cast<StringPrimitive *>(lookupVector_[index].getStringPrim()); strPrim->clearUniquedBit(); hashTable_.remove(strPrim); } freeID(index); } uint32_t IdentifierTable::allocNextID() { // If the free list is empty, grow the array. if (firstFreeID_ == LookupEntry::FREE_LIST_END) { uint32_t newID = lookupVector_.size(); if (LLVM_UNLIKELY(newID > LookupEntry::MAX_IDENTIFIER)) { hermes_fatal("Failed to allocate Identifier: IdentifierTable is full"); } lookupVector_.emplace_back(); markedSymbols_.push_back(true); LLVM_DEBUG( llvh::dbgs() << "Allocated new symbol id at end " << newID << "\n"); // Don't need to tell the GC about this ID, it will assume any growth is // live. return newID; } // Allocate from the free list. uint32_t nextId = firstFreeID_; auto &entry = getLookupTableEntry(nextId); assert(entry.isFreeSlot() && "firstFreeID_ is not a free slot"); firstFreeID_ = entry.getNextFreeSlot(); markedSymbols_.set(nextId); LLVM_DEBUG(llvh::dbgs() << "Allocated freed symbol id " << nextId << "\n"); return nextId; } void IdentifierTable::freeID(uint32_t id) { assert( lookupVector_[id].isStringPrim() && "The specified symbol cannot be freed"); // Add the freed index to the free list. lookupVector_[id].free(firstFreeID_); firstFreeID_ = id; LLVM_DEBUG(llvh::dbgs() << "Freeing ID " << id << "\n"); } void IdentifierTable::unmarkSymbols() { markedSymbols_.reset(); } void IdentifierTable::freeUnmarkedSymbols( const llvh::BitVector &markedSymbols, GC::IDTracker &tracker) { assert( markedSymbols.size() <= lookupVector_.size() && "Size of markedSymbols must be less than the current lookupVector"); assert( markedSymbols_.size() == lookupVector_.size() && "Size of markedSymbols_ must be the same as the lookupVector"); markedSymbols_ |= markedSymbols; // Flip and find set bits, which will correspond to symbols that weren't // marked. markedSymbols_.flip(); const bool isTrackingIDs = tracker.isTrackingIDs(); const uint32_t markedSymbolsSize = markedSymbols.size(); for (const uint32_t i : markedSymbols_.set_bits()) { // Don't check any bits after the passed-in bits, which represent the number // of symbols alive at the start of the collection. if (i >= markedSymbolsSize) { break; } // We never free StringPrimitives that are materialized from a lazy // identifier. if (lookupVector_[i].isNonLazyStringPrim()) { if (isTrackingIDs) { tracker.untrackSymbol(i); } freeSymbol(i); } } markedSymbols_.reset(); } #ifdef HERMES_SLOW_DEBUG bool IdentifierTable::isSymbolLive(SymbolID id) const { auto &entry = getLookupTableEntry(id); // If the entry is not a free slot, then it is live. return !entry.isFreeSlot(); } const StringPrimitive *IdentifierTable::getStringForSymbol(SymbolID id) const { auto &entry = getLookupTableEntry(id); if (entry.isStringPrim()) { return entry.getStringPrim(); } return nullptr; } #endif SymbolID IdentifierTable::createNotUniquedLazySymbol(ASCIIRef desc) { uint32_t nextID = allocNextID(); new (&lookupVector_[nextID]) LookupEntry(desc, 0, true); return SymbolID::unsafeCreateNotUniqued(nextID); } CallResult<SymbolID> IdentifierTable::createNotUniquedSymbol( Runtime &runtime, Handle<StringPrimitive> desc) { uint32_t nextID = allocNextID(); if (runtime.getHeap().inYoungGen(desc.get())) { // Need to reallocate in the old gen if the description is in the young gen. CallResult<PseudoHandle<StringPrimitive>> longLivedStr = desc->isASCII() ? allocateDynamicString<char, /* Unique */ false>( runtime, desc->castToASCIIRef(), desc) : allocateDynamicString<char16_t, /* Unique */ false>( runtime, desc->castToUTF16Ref(), desc); // Since we keep a raw pointer to mem, no more JS heap allocations after // this point. NoAllocScope _(runtime); if (LLVM_UNLIKELY(longLivedStr == ExecutionStatus::EXCEPTION)) { return ExecutionStatus::EXCEPTION; } new (&lookupVector_[nextID]) LookupEntry(longLivedStr->get(), true); } else { // Description is already in the old gen, just point to it. new (&lookupVector_[nextID]) LookupEntry(*desc, true); } return SymbolID::unsafeCreateNotUniqued(nextID); } llvh::raw_ostream &operator<<(llvh::raw_ostream &OS, SymbolID symbolID) { if (symbolID.isInvalid()) return OS << "SymbolID(INVALID)"; else if (symbolID == SymbolID::deleted()) return OS << "SymbolID(DELETED)"; else return OS << "SymbolID(" << (symbolID.isUniqued() ? "(Uniqued)" : "(Not Uniqued)") << symbolID.unsafeGetIndex() << ")"; } } // namespace vm } // namespace hermes #undef DEBUG_TYPE