lib/VM/RuntimeModule.cpp (378 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "hermes/VM/RuntimeModule.h" #include "hermes/BCGen/HBC/BytecodeProviderFromSrc.h" #include "hermes/Support/PerfSection.h" #include "hermes/VM/CodeBlock.h" #include "hermes/VM/Domain.h" #include "hermes/VM/HiddenClass.h" #include "hermes/VM/Predefined.h" #include "hermes/VM/Runtime.h" #include "hermes/VM/RuntimeModule-inline.h" #include "hermes/VM/StringPrimitive.h" namespace hermes { namespace vm { RuntimeModule::RuntimeModule( Runtime &runtime, Handle<Domain> domain, RuntimeModuleFlags flags, llvh::StringRef sourceURL, facebook::hermes::debugger::ScriptID scriptID) : runtime_(runtime), domain_(&runtime.getHeap(), domain), flags_(flags), sourceURL_(sourceURL), scriptID_(scriptID) { runtime_.addRuntimeModule(this); Domain::addRuntimeModule(domain, runtime, this); #ifndef HERMESVM_LEAN lazyRoot_ = this; #endif } SymbolID RuntimeModule::createSymbolFromStringIDMayAllocate( StringID stringID, const StringTableEntry &entry, OptValue<uint32_t> mhash) { // Use manual pointer arithmetic to avoid out of bounds errors on empty // string accesses. auto strStorage = bcProvider_->getStringStorage(); if (entry.isUTF16()) { const char16_t *s = (const char16_t *)(strStorage.begin() + entry.getOffset()); UTF16Ref str{s, entry.getLength()}; uint32_t hash = mhash ? *mhash : hashString(str); return mapStringMayAllocate(str, stringID, hash); } else { // ASCII. const char *s = (const char *)strStorage.begin() + entry.getOffset(); ASCIIRef str{s, entry.getLength()}; uint32_t hash = mhash ? *mhash : hashString(str); return mapStringMayAllocate(str, stringID, hash); } } RuntimeModule::~RuntimeModule() { if (bcProvider_ && !bcProvider_->getRawBuffer().empty()) runtime_.getCrashManager().unregisterMemory(bcProvider_.get()); runtime_.getCrashManager().unregisterMemory(this); runtime_.removeRuntimeModule(this); // We may reference other CodeBlocks through lazy compilation, but we only // own the ones that reference us. for (auto *block : functionMap_) { if (block != nullptr && block->getRuntimeModule() == this) { runtime_.getHeap().getIDTracker().untrackNative(block); delete block; } } runtime_.getHeap().getIDTracker().untrackNative(&functionMap_); } void RuntimeModule::prepareForRuntimeShutdown() { for (int i = 0, e = functionMap_.size(); i < e; i++) { if (functionMap_[i] != nullptr && functionMap_[i]->getRuntimeModule() != this) { functionMap_[i] = nullptr; } } } CallResult<RuntimeModule *> RuntimeModule::create( Runtime &runtime, Handle<Domain> domain, facebook::hermes::debugger::ScriptID scriptID, std::shared_ptr<hbc::BCProvider> &&bytecode, RuntimeModuleFlags flags, llvh::StringRef sourceURL) { RuntimeModule *result; { WeakRefLock lk{runtime.getHeap().weakRefMutex()}; result = new RuntimeModule(runtime, domain, flags, sourceURL, scriptID); } runtime.getCrashManager().registerMemory(result, sizeof(*result)); if (bytecode) { if (result->initializeMayAllocate(std::move(bytecode)) == ExecutionStatus::EXCEPTION) { return ExecutionStatus::EXCEPTION; } // If the BC provider is backed by a buffer, register the BC provider struct // (but not the buffer contents, since that might be too large). if (result->bcProvider_ && !result->bcProvider_->getRawBuffer().empty()) runtime.getCrashManager().registerMemory( result->bcProvider_.get(), sizeof(hbc::BCProviderFromBuffer)); } return result; } RuntimeModule *RuntimeModule::createUninitialized( Runtime &runtime, Handle<Domain> domain, RuntimeModuleFlags flags, facebook::hermes::debugger::ScriptID scriptID) { WeakRefLock lk{runtime.getHeap().weakRefMutex()}; return new RuntimeModule(runtime, domain, flags, "", scriptID); } void RuntimeModule::initializeWithoutCJSModulesMayAllocate( std::shared_ptr<hbc::BCProvider> &&bytecode) { assert(!bcProvider_ && "RuntimeModule already initialized"); bcProvider_ = std::move(bytecode); importStringIDMapMayAllocate(); initializeFunctionMap(); } ExecutionStatus RuntimeModule::initializeMayAllocate( std::shared_ptr<hbc::BCProvider> &&bytecode) { initializeWithoutCJSModulesMayAllocate(std::move(bytecode)); if (LLVM_UNLIKELY(importCJSModuleTable() == ExecutionStatus::EXCEPTION)) { return ExecutionStatus::EXCEPTION; } return ExecutionStatus::RETURNED; } CodeBlock *RuntimeModule::getCodeBlockSlowPath(unsigned index) { #ifndef HERMESVM_LEAN if (bcProvider_->isFunctionLazy(index)) { auto *lazyModule = RuntimeModule::createLazyModule( runtime_, getDomain(runtime_), this, index); functionMap_[index] = lazyModule->getOnlyLazyCodeBlock(); return functionMap_[index]; } #endif functionMap_[index] = CodeBlock::createCodeBlock( this, bcProvider_->getFunctionHeader(index), bcProvider_->getBytecode(index), index); return functionMap_[index]; } #ifndef HERMESVM_LEAN RuntimeModule *RuntimeModule::createLazyModule( Runtime &runtime, Handle<Domain> domain, RuntimeModule *parent, uint32_t functionID) { auto RM = createUninitialized(runtime, domain); RM->lazyRoot_ = parent->lazyRoot_; // Copy the lazy root's script ID for lazy modules. RM->scriptID_ = RM->lazyRoot_->scriptID_; // Set the bcProvider's BytecodeModule to point to the parent's. assert(parent->isInitialized() && "Parent module must have been initialized"); auto *bcFunction = &((hbc::BCProviderFromSrc *)parent->getBytecode()) ->getBytecodeModule() ->getFunction(functionID); RM->bcProvider_ = hbc::BCProviderLazy::createBCProviderLazy(bcFunction); // We don't know which function index this block will eventually represent, // so just add it as 0 to ensure ownership. We'll move it later in // `initializeLazy`. RM->functionMap_.emplace_back(CodeBlock::createCodeBlock( RM, RM->bcProvider_->getFunctionHeader(functionID), {}, functionID)); // The module doesn't have a string table until we've compiled the block, // so just add the string name as 0 in the mean time for f.name to work via // getLazyName(). Since it's in the stringIDMap_, it'll be correctly GC'd. RM->stringIDMap_.emplace_back(parent->getSymbolIDFromStringIDMayAllocate( bcFunction->getHeader().functionName)); return RM; } SymbolID RuntimeModule::getLazyName() { assert(functionMap_.size() == 1 && "Not a lazy module?"); assert(stringIDMap_.size() == 1 && "Missing lazy function name symbol"); assert(this->stringIDMap_[0].isValid() && "Invalid function name symbol"); return this->stringIDMap_[0]; } void RuntimeModule::initializeLazyMayAllocate( std::unique_ptr<hbc::BCProvider> bytecode) { // Clear the old data provider first. bcProvider_ = nullptr; // Initialize without CJS module table because this compilation is done // separately, and the bytecode will not contain a module table. initializeWithoutCJSModulesMayAllocate(std::move(bytecode)); // createLazyCodeBlock added a single codeblock as functionMap_[0] assert(functionMap_[0] && "Missing first entry"); // We should move it to the index where it's supposed to be. This ensures a // 1-1 relationship between codeblocks and bytecodefunctions, which the // debugger relies on for setting step-out breakpoints in all functions. if (bcProvider_->getGlobalFunctionIndex() == 0) { // No move needed return; } assert( !functionMap_[bcProvider_->getGlobalFunctionIndex()] && "Entry point is already occupied"); functionMap_[bcProvider_->getGlobalFunctionIndex()] = functionMap_[0]; functionMap_[0] = nullptr; } #endif void RuntimeModule::importStringIDMapMayAllocate() { assert(bcProvider_ && "Uninitialized RuntimeModule"); PerfSection perf("Import String ID Map"); GCScope scope(runtime_); auto strTableSize = bcProvider_->getStringCount(); stringIDMap_.clear(); // Populate the string ID map with empty identifiers. stringIDMap_.resize(strTableSize, RootSymbolID(SymbolID::empty())); if (runtime_.getVMExperimentFlags() & experiments::MAdviseStringsSequential) { bcProvider_->adviseStringTableSequential(); } if (runtime_.getVMExperimentFlags() & experiments::MAdviseStringsWillNeed) { bcProvider_->willNeedStringTable(); } // Get the array of pre-computed hashes from identifiers in the bytecode // to their runtime representation as SymbolIDs. auto kinds = bcProvider_->getStringKinds(); auto hashes = bcProvider_->getIdentifierHashes(); assert( hashes.size() <= strTableSize && "Should not have more strings than identifiers"); // Preallocate enough space to store all identifiers to prevent // unnecessary allocations. NOTE: If this module is not the first module, // then this is an underestimate. runtime_.getIdentifierTable().reserve(hashes.size()); { StringID strID = 0; uint32_t hashID = 0; for (auto entry : kinds) { switch (entry.kind()) { case StringKind::String: strID += entry.count(); break; case StringKind::Identifier: for (uint32_t i = 0; i < entry.count(); ++i, ++strID, ++hashID) { createSymbolFromStringIDMayAllocate( strID, bcProvider_->getStringTableEntry(strID), hashes[hashID]); } break; } } assert(strID == strTableSize && "Should map every string in the bytecode."); assert(hashID == hashes.size() && "Should hash all identifiers."); } if (runtime_.getVMExperimentFlags() & experiments::MAdviseStringsRandom) { bcProvider_->adviseStringTableRandom(); } if (strTableSize == 0) { // If the string table turns out to be empty, // we always add one empty string to it. // Note that this can only happen when we are creating the RuntimeModule // in a non-standard way, either in unit tests or the special // emptyCodeBlockRuntimeModule_ in Runtime where the creation happens // manually instead of going through bytecode module generation. // In those cases, functions will be created with a default nameID=0 // without adding the name string into the string table. Hence here // we need to add it manually and it will have index 0. ASCIIRef s; stringIDMap_.push_back({}); mapStringMayAllocate(s, 0, hashString(s)); } } void RuntimeModule::initializeFunctionMap() { assert(bcProvider_ && "Uninitialized RuntimeModule"); assert( bcProvider_->getFunctionCount() >= functionMap_.size() && "Unexpected size reduction. Lazy module missing functions?"); functionMap_.resize(bcProvider_->getFunctionCount()); } ExecutionStatus RuntimeModule::importCJSModuleTable() { PerfSection perf("Import CJS Module Table"); return Domain::importCJSModuleTable(getDomain(runtime_), runtime_, this); } StringPrimitive *RuntimeModule::getStringPrimFromStringIDMayAllocate( StringID stringID) { return runtime_.getStringPrimFromSymbolID( getSymbolIDFromStringIDMayAllocate(stringID)); } std::string RuntimeModule::getStringFromStringID(StringID stringID) { auto entry = bcProvider_->getStringTableEntry(stringID); auto strStorage = bcProvider_->getStringStorage(); if (entry.isUTF16()) { const char16_t *s = (const char16_t *)(strStorage.begin() + entry.getOffset()); std::string out; convertUTF16ToUTF8WithReplacements(out, UTF16Ref{s, entry.getLength()}); return out; } else { // ASCII. const char *s = (const char *)strStorage.begin() + entry.getOffset(); return std::string{s, entry.getLength()}; } } llvh::ArrayRef<uint8_t> RuntimeModule::getRegExpBytecodeFromRegExpID( uint32_t regExpId) const { assert( regExpId < bcProvider_->getRegExpTable().size() && "Invalid regexp id"); RegExpTableEntry entry = bcProvider_->getRegExpTable()[regExpId]; return bcProvider_->getRegExpStorage().slice(entry.offset, entry.length); } template <typename T> SymbolID RuntimeModule::mapStringMayAllocate( llvh::ArrayRef<T> str, StringID stringID, uint32_t hash) { // Create a SymbolID for a given string. In general a SymbolID holds onto an // intern'd StringPrimitive. As an optimization, if this RuntimeModule is // persistent, then it will not be deallocated before the Runtime, and we can // have the SymbolID hold a raw pointer into the storage and produce the // StringPrimitive when it is first required. SymbolID id; if (flags_.persistent) { // Registering a lazy identifier does not allocate, so we do not need a // GC scope. id = runtime_.getIdentifierTable().registerLazyIdentifier(str, hash); } else { // Accessing a symbol non-lazily may allocate in the GC heap, so add a scope // marker. GCScopeMarkerRAII scopeMarker{runtime_}; id = *runtime_.ignoreAllocationFailure( runtime_.getIdentifierTable().getSymbolHandle(runtime_, str, hash)); } stringIDMap_[stringID] = RootSymbolID(id); return id; } void RuntimeModule::markRoots(RootAcceptor &acceptor, bool markLongLived) { for (auto &it : templateMap_) { acceptor.acceptPtr(it.second); } if (markLongLived) { for (auto symbol : stringIDMap_) { if (symbol.isValid()) { acceptor.accept(symbol); } } } } void RuntimeModule::markWeakRoots(WeakRootAcceptor &acceptor) { for (auto &cbPtr : functionMap_) { // Only mark a CodeBlock is its non-null, and has not been scanned // previously in this top-level markRoots invocation. if (cbPtr != nullptr && cbPtr->getRuntimeModule() == this) { cbPtr->markCachedHiddenClasses(runtime_, acceptor); } } for (auto &entry : objectLiteralHiddenClasses_) { if (entry.second) { acceptor.acceptWeak(entry.second); } } } void RuntimeModule::markDomainRef(WeakRefAcceptor &acceptor) { acceptor.accept(domain_); } llvh::Optional<Handle<HiddenClass>> RuntimeModule::findCachedLiteralHiddenClass( Runtime &runtime, unsigned keyBufferIndex, unsigned numLiterals) const { if (canGenerateLiteralHiddenClassCacheKey(keyBufferIndex, numLiterals)) { const auto cachedHiddenClassIter = objectLiteralHiddenClasses_.find( getLiteralHiddenClassCacheHashKey(keyBufferIndex, numLiterals)); if (cachedHiddenClassIter != objectLiteralHiddenClasses_.end()) { if (HiddenClass *const cachedHiddenClass = cachedHiddenClassIter->second.get(runtime, &runtime.getHeap())) { return runtime_.makeHandle(cachedHiddenClass); } } } return llvh::None; } void RuntimeModule::tryCacheLiteralHiddenClass( Runtime &runtime, unsigned keyBufferIndex, HiddenClass *clazz) { auto numLiterals = clazz->getNumProperties(); if (canGenerateLiteralHiddenClassCacheKey(keyBufferIndex, numLiterals)) { assert( !findCachedLiteralHiddenClass(runtime, keyBufferIndex, numLiterals) .hasValue() && "Why are we caching an item already cached?"); objectLiteralHiddenClasses_[getLiteralHiddenClassCacheHashKey( keyBufferIndex, numLiterals)] .set(runtime, clazz); } } size_t RuntimeModule::additionalMemorySize() const { return stringIDMap_.capacity() * sizeof(SymbolID) + objectLiteralHiddenClasses_.getMemorySize() + templateMap_.getMemorySize(); } void RuntimeModule::snapshotAddNodes(GC *gc, HeapSnapshot &snap) const { // Create a native node for each CodeBlock owned by this module. for (const CodeBlock *cb : functionMap_) { // Skip the null code blocks, they are lazily inserted the first time // they are used. if (cb && cb->getRuntimeModule() == this) { // Only add a CodeBlock if this runtime module is the owner. snap.beginNode(); snap.endNode( HeapSnapshot::NodeType::Native, "CodeBlock", gc->getNativeID(cb), sizeof(CodeBlock) + cb->additionalMemorySize(), 0); } } // Create a node for functionMap_. snap.beginNode(); // Create an edge to each CodeBlock owned by this module. for (int i = 0, e = functionMap_.size(); i < e; i++) { const CodeBlock *cb = functionMap_[i]; // Skip the null code blocks, they are lazily inserted the first time // they are used. if (cb && cb->getRuntimeModule() == this) { // Only add a CodeBlock if this runtime module is the owner. snap.addIndexedEdge( HeapSnapshot::EdgeType::Element, i, gc->getNativeID(cb)); } } snap.endNode( HeapSnapshot::NodeType::Native, "std::vector<CodeBlock *>", gc->getNativeID(&functionMap_), functionMap_.capacity() * sizeof(CodeBlock *), 0); } void RuntimeModule::snapshotAddEdges(GC *gc, HeapSnapshot &snap) const { snap.addNamedEdge( HeapSnapshot::EdgeType::Internal, "functionMap", gc->getNativeID(&functionMap_)); } namespace detail { StringID mapStringMayAllocate(RuntimeModule &module, const char *str) { module.stringIDMap_.push_back({}); module.mapStringMayAllocate( createASCIIRef(str), module.stringIDMap_.size() - 1); return module.stringIDMap_.size() - 1; } } // namespace detail } // namespace vm } // namespace hermes