lib/CompilerDriver/CompilerDriver.cpp

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "hermes/CompilerDriver/CompilerDriver.h" #include "hermes/AST/CommonJS.h" #include "hermes/AST/Context.h" #include "hermes/AST/ESTreeJSONDumper.h" #include "hermes/AST/SemValidate.h" #include "hermes/AST2JS/AST2JS.h" #include "hermes/BCGen/HBC/BytecodeDisassembler.h" #include "hermes/BCGen/HBC/HBC.h" #include "hermes/BCGen/RegAlloc.h" #include "hermes/ConsoleHost/ConsoleHost.h" #include "hermes/FlowParser/FlowParser.h" #include "hermes/IR/Analysis.h" #include "hermes/IR/IR.h" #include "hermes/IR/IRBuilder.h" #include "hermes/IR/IRVerifier.h" #include "hermes/IR/Instrs.h" #include "hermes/IRGen/IRGen.h" #include "hermes/Optimizer/PassManager/PassManager.h" #include "hermes/Optimizer/PassManager/Pipeline.h" #include "hermes/Parser/JSONParser.h" #include "hermes/Parser/JSParser.h" #include "hermes/Runtime/Libhermes.h" #include "hermes/SourceMap/SourceMapGenerator.h" #include "hermes/SourceMap/SourceMapParser.h" #include "hermes/SourceMap/SourceMapTranslator.h" #include "hermes/Support/Algorithms.h" #include "hermes/Support/MemoryBuffer.h" #include "hermes/Support/OSCompat.h" #include "hermes/Support/OptValue.h" #include "hermes/Support/Warning.h" #include "hermes/Utils/Dumper.h" #include "hermes/Utils/Options.h" #include "llvh/Support/CommandLine.h" #include "llvh/Support/Debug.h" #include "llvh/Support/FileSystem.h" #include "llvh/Support/MemoryBuffer.h" #include "llvh/Support/Path.h" #include "llvh/Support/Process.h" #include "llvh/Support/SHA1.h" #include "llvh/Support/raw_ostream.h" #include "zip/src/zip.h" #include <sstream> #define DEBUG_TYPE "hermes" using llvh::ArrayRef; using llvh::cast; using llvh::dyn_cast; using llvh::Optional; using llvh::raw_fd_ostream; using llvh::sys::fs::F_None; using llvh::sys::fs::F_Text; using namespace hermes; using namespace hermes::driver; namespace cl { using llvh::cl::cat; using llvh::cl::desc; using llvh::cl::Hidden; using llvh::cl::init; using llvh::cl::list; using llvh::cl::opt; using llvh::cl::OptionCategory; using llvh::cl::Positional; using llvh::cl::value_desc; using llvh::cl::values; using llvh::cl::ValuesClass; /// Encapsulate a compiler flag: for example, "-fflag/-fno-flag", or /// "-Wflag/-Wno-flag". class CLFlag { std::string yesName_; std::string yesHelp_; std::string noName_; std::string noHelp_; llvh::cl::opt<bool> yes_; llvh::cl::opt<bool> no_; const bool defaultValue_; public: CLFlag(const CLFlag &) = delete; void operator=(CLFlag &) = delete; /// \param flagChar is the character that will be prepended to the flag name. /// \param name is the name for the command line option /// \param defaultValue is the default if neither is specified. /// \param desc is the description starting with lower case like " inlining of /// functions". CLFlag( char flagChar, const llvh::Twine &name, bool defaultValue, const llvh::Twine &desc, llvh::cl::OptionCategory &category) : yesName_((llvh::Twine(flagChar) + name).str()), yesHelp_(("Enable " + desc).str()), noName_((llvh::Twine(flagChar) + "no-" + name).str()), noHelp_(("Disable " + desc).str()), yes_( StringRef(yesName_), llvh::cl::ValueDisallowed, llvh::cl::desc(StringRef(yesHelp_)), llvh::cl::cat(category)), no_(StringRef(noName_), llvh::cl::ValueDisallowed, llvh::cl::Hidden, llvh::cl::desc(StringRef(noHelp_)), llvh::cl::cat(category)), defaultValue_(defaultValue) {} /// Resolve the value of the flag depending on which command line option is /// present and which one is last. bool getValue() const { if (yes_.getPosition() > no_.getPosition()) return true; if (yes_.getPosition() < no_.getPosition()) return false; return defaultValue_; } /// Casting to bool always makes sense, so no "explicit" needed here. operator bool() const { return getValue(); } }; static OptionCategory CompilerCategory( "Compiler Options", "These options change how JS is compiled."); list<std::string> InputFilenames(desc("<file1> <file2>..."), Positional); #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) static opt<bool> PrintStats("print-stats", desc("Print statistics")); #endif enum class OptLevel { O0, Og, OMax, }; cl::opt<OptLevel> OptimizationLevel( cl::desc("Choose optimization level:"), cl::init(OptLevel::OMax), cl::values( clEnumValN(OptLevel::O0, "O0", "No optimizations"), clEnumValN(OptLevel::Og, "Og", "Optimizations suitable for debugging"), clEnumValN(OptLevel::OMax, "O", "Expensive optimizations")), cl::cat(CompilerCategory)); enum class StaticBuiltinSetting { ForceOn, ForceOff, AutoDetect, }; cl::opt<StaticBuiltinSetting> StaticBuiltins( cl::desc( "recognizing of calls to global functions like Object.keys() statically"), cl::init(StaticBuiltinSetting::AutoDetect), cl::values( clEnumValN( StaticBuiltinSetting::ForceOn, "fstatic-builtins", "Enable static builtins."), clEnumValN( StaticBuiltinSetting::ForceOff, "fno-static-builtins", "Disable static builtins."), clEnumValN( StaticBuiltinSetting::AutoDetect, "fauto-detect-static-builtins", "Automatically detect 'use static builtin' directive from the source.")), cl::cat(CompilerCategory)); static list<std::string> CustomOptimize( "custom-opt", desc("Custom optimzations"), Hidden, cat(CompilerCategory)); static opt<OutputFormatKind> DumpTarget( desc("Choose output:"), init(Execute), values( clEnumValN(Execute, "exec", "Execute the provided script (default)"), clEnumValN(DumpAST, "dump-ast", "Dump the AST as text in JSON"), clEnumValN( DumpTransformedAST, "dump-transformed-ast", "Dump the transformed AST as text after validation"), clEnumValN(DumpJS, "dump-js", "Dump the AST as JS"), clEnumValN( DumpTransformedJS, "dump-transformed-js", "Dump the transformed AST as JS after validation"), #ifndef NDEBUG clEnumValN(ViewCFG, "view-cfg", "View the CFG."), #endif clEnumValN(DumpIR, "dump-ir", "Dump the IR as text"), clEnumValN(DumpLIR, "dump-lir", "Dump the Lowered IR as text"), clEnumValN(DumpRA, "dump-ra", "Dump the register-allocated IR as text"), clEnumValN( DumpLRA, "dump-lra", "Dump register-allocated Lowered IR as text"), clEnumValN( DumpPostRA, "dump-postra", "Dump the Lowered IR after register allocation"), clEnumValN(DumpBytecode, "dump-bytecode", "Dump bytecode as text"), clEnumValN(EmitBundle, "emit-binary", "Emit compiled binary")), cat(CompilerCategory)); static opt<bool> Pretty( "pretty", init(true), desc("Pretty print JSON, JS or disassembled bytecode"), cat(CompilerCategory)); static llvh::cl::alias _PrettyJSON( "pretty-json", desc("Alias for --pretty"), Hidden, llvh::cl::aliasopt(Pretty)); static llvh::cl::alias _PrettyDisassemble( "pretty-disassemble", desc("Alias for --pretty"), Hidden, llvh::cl::aliasopt(Pretty)); /// Unused option kept for backwards compatibility. static opt<bool> unused_HermesParser( "hermes-parser", desc("Treat the input as JavaScript"), Hidden, cat(CompilerCategory)); static opt<bool> FlowParser( "Xflow-parser", init(false), desc("Use libflowparser instead of the hermes parser"), Hidden, cat(CompilerCategory)); static opt<bool> BytecodeMode( "b", desc("Treat the input as executable bytecode")); static opt<bool> NonStrictMode( "non-strict", desc("Enable non-strict mode."), cat(CompilerCategory)); static opt<bool> StrictMode("strict", desc("Enable strict mode."), cat(CompilerCategory)); static opt<bool> LazyCompilation( "lazy", init(false), desc("Force fully lazy compilation"), cat(CompilerCategory)); static opt<bool> EagerCompilation( "eager", init(false), desc("Force fully eager compilation"), cat(CompilerCategory)); /// The following flags are exported so it may be used by the VM driver as well. opt<bool> BasicBlockProfiling( "basic-block-profiling", init(false), desc("Enable basic block profiling (HBC only)")); opt<bool> EnableEval("enable-eval", init(true), desc("Enable support for eval()")); // This is normally a compiler option, but it also applies to strings given // to eval or the Function constructor. opt<bool> VerifyIR( "verify-ir", #ifdef HERMES_SLOW_DEBUG init(true), #else init(false), Hidden, #endif desc("Verify the IR after creating it"), cat(CompilerCategory)); opt<bool> EmitAsyncBreakCheck( "emit-async-break-check", desc("Emit instruction to check async break request"), init(false), cat(CompilerCategory)); opt<bool> OptimizedEval( "optimized-eval", desc("Turn on compiler optimizations in eval."), init(false)); static list<std::string> IncludeGlobals( "include-globals", desc("Include the definitions of global properties (can be " "specified more than once)"), value_desc("filename")); enum BytecodeFormatKind { HBC, }; // Enable Debug Options to be specified on the command line static opt<BytecodeFormatKind> BytecodeFormat( "target", init(HBC), desc("Set the bytecode format:"), values(clEnumVal(HBC, "Emit HBC bytecode (default)")), cat(CompilerCategory)); static opt<std::string> BytecodeOutputFilename( "out", desc("Output file name"), cat(CompilerCategory)); static opt<std::string> BytecodeManifestFilename( "bytecode-output-manifest", init("manifest.json"), desc( "Name of the manifest file generated when compiling multiple segments to bytecode"), cat(CompilerCategory)); enum class DebugLevel { g0, g1, g2, g3 }; static cl::opt<DebugLevel> DebugInfoLevel( cl::desc("Choose debug info level:"), cl::init(DebugLevel::g1), cl::values( clEnumValN(DebugLevel::g3, "g", "Equivalent to -g3"), clEnumValN(DebugLevel::g0, "g0", "Do not emit debug info"), clEnumValN(DebugLevel::g1, "g1", "Emit location info for backtraces"), clEnumValN( DebugLevel::g2, "g2", "Emit location info for all instructions"), clEnumValN(DebugLevel::g3, "g3", "Emit full info for debugging")), cl::cat(CompilerCategory)); static opt<std::string> InputSourceMap( "source-map", desc("Specify a matching source map for the input JS file"), cat(CompilerCategory)); static opt<bool> OutputSourceMap( "output-source-map", desc("Emit a source map to the output filename with .map extension"), cat(CompilerCategory)); static opt<bool> DumpOperandRegisters( "dump-operand-registers", desc("Dump registers assigned to instruction operands"), cat(CompilerCategory)); static opt<bool> DumpUseList( "dump-instr-uselist", desc("Print the use list if the instruction has any users."), init(false), cat(CompilerCategory)); static opt<LocationDumpMode> DumpSourceLocation( "dump-source-location", desc("Print source location information in IR or AST dumps."), init(LocationDumpMode::None), values( clEnumValN( LocationDumpMode::LocAndRange, "both", "Print both source location and byte range"), clEnumValN(LocationDumpMode::Loc, "loc", "Print only source location"), clEnumValN(LocationDumpMode::Range, "range", "Print only byte range")), cat(CompilerCategory)); static opt<bool> IncludeEmptyASTNodes( "Xinclude-empty-ast-nodes", desc("Print all AST nodes, including nodes that are hidden when empty."), Hidden, cat(CompilerCategory)); static opt<bool> IncludeRawASTProp( "Xinclude-raw-ast-prop", desc("Print the 'raw' AST property, when available."), init(true), Hidden, cat(CompilerCategory)); static opt<bool> DumpBetweenPasses( "Xdump-between-passes", init(false), Hidden, desc("Print IR after every optimization pass"), cat(CompilerCategory)); #ifndef NDEBUG static opt<bool> LexerOnly( "Xlexer-only", desc("Only run the lexer on the input (debug builds only)"), Hidden, cat(CompilerCategory)); #endif static opt<int> MaxDiagnosticWidth( "max-diagnostic-width", llvh::cl::desc("Preferred diagnostic maximum width"), llvh::cl::init(0), cat(CompilerCategory)); static opt<bool> CommonJS( "commonjs", desc("Use CommonJS modules"), init(false), cat(CompilerCategory)); #if HERMES_PARSE_JSX static opt<bool> JSX("parse-jsx", desc("Parse JSX"), init(false), cat(CompilerCategory)); #endif #if HERMES_PARSE_FLOW static opt<bool> ParseFlow( "parse-flow", desc("Parse Flow"), init(false), cat(CompilerCategory)); #endif #if HERMES_PARSE_TS static opt<bool> ParseTS( "parse-ts", desc("Parse TypeScript"), init(false), cat(CompilerCategory)); #endif static CLFlag StaticRequire( 'f', "static-require", false, "resolving of CommonJS require() calls at compile time", CompilerCategory); static opt<unsigned> ErrorLimit( "ferror-limit", desc("Maximum number of errors (0 means unlimited)"), init(20), cat(CompilerCategory)); static ValuesClass warningValues{ #define WARNING_CATEGORY_HIDDEN(name, specifier, description) \ clEnumValN(Warning::name, specifier, description), #include "hermes/Support/Warnings.def" }; static list<hermes::Warning> Werror( llvh::cl::ValueOptional, "Werror", value_desc("category"), desc( "Treat all warnings as errors, or treat warnings of a particular category as errors"), warningValues, cat(CompilerCategory)); static list<hermes::Warning> Wnoerror( llvh::cl::ValueOptional, "Wno-error", value_desc("category"), Hidden, desc( "Treat no warnings as errors, or treat warnings of a particular category as warnings"), warningValues, cat(CompilerCategory)); static opt<bool> DisableAllWarnings( "w", desc("Disable all warnings"), init(false), cat(CompilerCategory)); static opt<bool> ReusePropCache( "reuse-prop-cache", desc("Reuse property cache entries for same property name"), init(true)); static CLFlag Inline('f', "inline", true, "inlining of functions", CompilerCategory); static CLFlag StripFunctionNames( 'f', "strip-function-names", false, "Strip function names to reduce string table size", CompilerCategory); static opt<bool> EnableTDZ( "Xenable-tdz", init(false), Hidden, desc("UNSUPPORTED: Enable TDZ checks for let/const"), cat(CompilerCategory)); #define WARNING_CATEGORY(name, specifier, description) \ static CLFlag name##Warning( \ 'W', specifier, true, description, CompilerCategory); #include "hermes/Support/Warnings.def" static opt<std::string> BaseBytecodeFile( "base-bytecode", llvh::cl::desc("input base bytecode for delta optimizing mode"), llvh::cl::init(""), cat(CompilerCategory)); static opt<unsigned> PadFunctionBodiesPercent( "pad-function-bodies-percent", desc( "Add this much garbage after each function body (relative to its size)."), init(0), Hidden, cat(CompilerCategory)); static opt<bool> InstrumentIR( "instrument", desc("Instrument code for dynamic analysis"), init(false), Hidden, cat(CompilerCategory)); static CLFlag UseUnsafeIntrinsics( 'f', "unsafe-intrinsics", false, "Recognize and lower Asm.js/Wasm unsafe compiler intrinsics.", CompilerCategory); } // namespace cl namespace { struct ModuleInSegment { /// Index of the module, to be used as the ID when generating IR. uint32_t id; /// Input source file. May be a JavaScript source file or an HBC file. std::unique_ptr<llvh::MemoryBuffer> file; /// SourceMap file. nullptr if not specified by the user. std::unique_ptr<llvh::MemoryBuffer> sourceMap; }; /// Encodes a list of files that are part of a given segment. using SegmentTableEntry = std::vector<ModuleInSegment>; /// Mapping from segment index to the file buffers in that segment. /// For a given table, table[i][j] is the j-indexed file in segment i. /// Use an std::map to ensure that the order of iteration is guaranteed here, /// allowing the assumption that the segments have strictly increasing /// module IDs. The entry point must be found at table[0][0]. /// If multiple segments or multiple input files are not being used, /// the only input will be at table[0][0]. using SegmentTable = std::map<uint32_t, SegmentTableEntry>; /// Mapping from file name to module ID. File names are relative to the input /// root path (directory / zip file) and normalized with /// remove_leading_dotslash. using ModuleIDsTable = llvh::DenseMap<llvh::StringRef, uint32_t>; /// Read a file at path \p path into a memory buffer. If \p stdinOk is set, /// allow "-" to mean stdin. /// \param silent if true, don't print an error message on failure. /// \return the memory buffer, or nullptr on error, in /// which case an error message will have been printed to llvh::errs(). std::unique_ptr<llvh::MemoryBuffer> memoryBufferFromFile( llvh::StringRef path, bool stdinOk = false, bool silent = false) { auto fileBuf = stdinOk ? llvh::MemoryBuffer::getFileOrSTDIN(path) : llvh::MemoryBuffer::getFile(path); if (!fileBuf) { if (!silent) { llvh::errs() << "Error! Failed to open file: " << path << '\n'; } return nullptr; } return std::move(*fileBuf); } /// Read a file from \p path relative to the root of the zip file \p zip /// into a memory buffer. Print error messages to llvh::errs(). /// \param zip the zip file to read from (must not be null). /// \param path the path in the zip file, must be null-terminated. /// \return the read file, nullptr on error. std::unique_ptr<llvh::MemoryBuffer> memoryBufferFromZipFile(zip_t *zip, const char *path, bool silent = false) { assert(zip && "zip file must not be null"); int result = 0; result = zip_entry_open(zip, path); if (result < 0) { if (!silent) { llvh::errs() << "Zip error: reading " << path << ": " << zip_strerror(result) << "\n"; } return nullptr; } size_t size = zip_entry_size(zip); // Read data from the file, ensuring null termination of the data. std::unique_ptr<llvh::MemoryBuffer> buf = llvh::WritableMemoryBuffer::getNewMemBuffer(size, path); zip_entry_noallocread(zip, const_cast<char *>(buf->getBufferStart()), size); zip_entry_close(zip); return buf; } /// Manage an output file safely. class OutputStream { public: /// Creates an empty object. OutputStream() : os_(nullptr) {} /// Create an object which initially holds the \p defaultStream. OutputStream(llvh::raw_ostream &defaultStream) : os_(&defaultStream) {} ~OutputStream() { discard(); } /// Replaces the stream with an open stream to a temporary file /// named based on \p fileName. This method will write error /// messages, if any, to llvh::errs(). This method can only be /// called once on an object. \return true if the temp file was /// created and false otherwise. If the object is destroyed without /// close() being called, the temp file is removed. bool open(llvh::Twine fileName, llvh::sys::fs::OpenFlags openFlags) { assert(!fdos_ && "OutputStream::open() can be called only once."); // Newer versions of llvm have a safe createUniqueFile overload // which takes OpenFlags. Hermes's llvm doesn't, so we have to do // it this way, which is a hypothetical race. std::error_code EC = llvh::sys::fs::getPotentiallyUniqueFileName( fileName + ".%%%%%%", tempName_); if (EC) { llvh::errs() << "Failed to get temp file for " << fileName << ": " << EC.message() << '\n'; return false; } fdos_ = std::make_unique<raw_fd_ostream>(tempName_, EC, openFlags); if (EC) { llvh::errs() << "Failed to open file " << tempName_ << ": " << EC.message() << '\n'; fdos_.reset(); return false; } os_ = fdos_.get(); fileName_ = fileName.str(); return true; } /// If a temporary file was created, it is renamed to \p fileName. /// If renaming fails, it will be deleted. This method will write /// error messages, if any, to llvh::errs(). \return true if a temp /// file was never created or was renamed here; or false otherwise. bool close() { if (!fdos_) { return true; } fdos_->close(); fdos_.reset(); std::error_code EC = llvh::sys::fs::rename(tempName_, fileName_); if (EC) { llvh::errs() << "Failed to write file " << fileName_ << ": " << EC.message() << '\n'; llvh::sys::fs::remove(tempName_); return false; } return true; } /// If a temporary file was created, it is deleted. void discard() { if (!fdos_) { return; } fdos_->close(); fdos_.reset(); llvh::sys::fs::remove(tempName_); } raw_ostream &os() { assert(os_ && "OutputStream never initialized"); return *os_; } private: llvh::raw_ostream *os_; llvh::SmallString<32> tempName_; std::unique_ptr<raw_fd_ostream> fdos_; std::string fileName_; }; /// Loads global definitions from MemoryBuffer and adds the definitions to \p /// declFileList. /// \return true on success, false on error. bool loadGlobalDefinition( Context &context, std::unique_ptr<llvh::MemoryBuffer> content, DeclarationFileListTy &declFileList) { parser::JSParser jsParser(context, std::move(content)); auto parsedJs = jsParser.parse(); if (!parsedJs) return false; declFileList.push_back(parsedJs.getValue()); return true; } /// Attempt to guess the best error output options by inspecting stderr SourceErrorOutputOptions guessErrorOutputOptions() { SourceErrorOutputOptions result; result.showColors = oscompat::should_color(STDERR_FILENO); result.preferredMaxErrorWidth = SourceErrorOutputOptions::UnlimitedWidth; if (oscompat::isatty(STDERR_FILENO)) { result.preferredMaxErrorWidth = llvh::sys::Process::StandardErrColumns(); } // Respect MaxDiagnosticWidth if nonzero if (cl::MaxDiagnosticWidth < 0) { result.preferredMaxErrorWidth = SourceErrorOutputOptions::UnlimitedWidth; } else if (cl::MaxDiagnosticWidth > 0) { result.preferredMaxErrorWidth = static_cast<size_t>(cl::MaxDiagnosticWidth); } return result; } /// Parse the given files and return a single AST pointer. /// \p sourceMap any parsed source map associated with \p fileBuf. /// \p sourceMapTranslator input source map coordinate translator. /// \return A pointer to the new validated AST, nullptr if parsing failed. /// If using CJS modules, return a FunctionExpressionNode, else a ProgramNode. ESTree::NodePtr parseJS( std::shared_ptr<Context> &context, sem::SemContext &semCtx, std::unique_ptr<llvh::MemoryBuffer> fileBuf, std::unique_ptr<SourceMap> sourceMap = nullptr, std::shared_ptr<SourceMapTranslator> sourceMapTranslator = nullptr, bool wrapCJSModule = false) { assert(fileBuf && "Need a file to compile"); assert(context && "Need a context to compile using"); // This value will be set to true if the parser detected the 'use static // builtin' directive in the source. bool useStaticBuiltinDetected = false; bool isLargeFile = fileBuf->getBufferSize() >= context->getPreemptiveFileCompilationThreshold(); int fileBufId = context->getSourceErrorManager().addNewSourceBuffer(std::move(fileBuf)); if (sourceMap != nullptr && sourceMapTranslator != nullptr) { sourceMapTranslator->addSourceMap(fileBufId, std::move(sourceMap)); } auto mode = parser::FullParse; if (context->isLazyCompilation() && isLargeFile) { if (!parser::JSParser::preParseBuffer( *context, fileBufId, useStaticBuiltinDetected)) { return nullptr; } mode = parser::LazyParse; } Optional<ESTree::ProgramNode *> parsedJs; #ifdef HERMES_USE_FLOWPARSER if (cl::FlowParser) { parsedJs = parser::parseFlowParser(*context, fileBufId); } else #endif { parser::JSParser jsParser(*context, fileBufId, mode); parsedJs = jsParser.parse(); // If we are using lazy parse mode, we should have already detected the 'use // static builtin' directive in the pre-parsing stage. if (mode != parser::LazyParse) { useStaticBuiltinDetected = jsParser.getUseStaticBuiltin(); } } if (!parsedJs) return nullptr; ESTree::NodePtr parsedAST = parsedJs.getValue(); if (cl::StaticBuiltins == cl::StaticBuiltinSetting::AutoDetect) { context->setStaticBuiltinOptimization(useStaticBuiltinDetected); } if (wrapCJSModule) { parsedAST = hermes::wrapCJSModule(context, cast<ESTree::ProgramNode>(parsedAST)); if (!parsedAST) { return nullptr; } } if (cl::DumpTarget == DumpAST) { hermes::dumpESTreeJSON( llvh::outs(), parsedAST, cl::Pretty /* pretty */, cl::IncludeEmptyASTNodes ? ESTreeDumpMode::DumpAll : ESTreeDumpMode::HideEmpty, context->getSourceErrorManager(), cl::DumpSourceLocation, cl::IncludeRawASTProp ? ESTreeRawProp::Include : ESTreeRawProp::Exclude); return parsedAST; } if (cl::DumpTarget == DumpJS) { hermes::generateJS(llvh::outs(), parsedAST, cl::Pretty /* pretty */); return parsedAST; } if (!hermes::sem::validateAST(*context, semCtx, parsedAST)) { return nullptr; } if (cl::DumpTarget == DumpTransformedAST) { hermes::dumpESTreeJSON( llvh::outs(), parsedAST, cl::Pretty /* pretty */, cl::IncludeEmptyASTNodes ? ESTreeDumpMode::DumpAll : ESTreeDumpMode::HideEmpty, context->getSourceErrorManager(), cl::DumpSourceLocation, cl::IncludeRawASTProp ? ESTreeRawProp::Include : ESTreeRawProp::Exclude); } if (cl::DumpTarget == DumpTransformedJS) { hermes::generateJS(llvh::outs(), parsedAST, cl::Pretty /* pretty */); } return parsedAST; } /// Apply custom logic for flag initialization. void setFlagDefaults() { // We haven't been given any file names; just use "-", which acts as stdin. if (cl::InputFilenames.empty()) { cl::InputFilenames.push_back("-"); } // If bytecode mode is not explicitly specified, check the input extension. // of the input file. if (!cl::BytecodeMode && cl::InputFilenames.size() == 1 && llvh::sys::path::extension(cl::InputFilenames[0]) == ".hbc") { cl::BytecodeMode = true; } if (cl::LazyCompilation && cl::OptimizationLevel > cl::OptLevel::Og) { cl::OptimizationLevel = cl::OptLevel::Og; } if (cl::OutputSourceMap && cl::DebugInfoLevel < cl::DebugLevel::g2) { cl::DebugInfoLevel = cl::DebugLevel::g2; } } /// Validate command line flags. /// \return true if the flags are valid, false if not. On a false return, an /// error will have been printed to stderr. bool validateFlags() { // Helper to print an error message and return false. bool errored = false; auto err = [&errored](const char *msg) { if (!errored) { llvh::errs() << msg << '\n'; errored = true; } }; // Validate strict vs non strict mode. if (cl::NonStrictMode && cl::StrictMode) { err("Error! Cannot use both -strict and -non-strict"); } // Validate bytecode output file. if (cl::DumpTarget == EmitBundle && cl::BytecodeOutputFilename.empty() && oscompat::isatty(STDOUT_FILENO)) { // To skip this check and trash the terminal, use -out /dev/stdout. err("Refusing to write binary bundle to terminal.\n" "Specify output file with -out filename."); } if (cl::LazyCompilation && cl::EagerCompilation) { err("Can't specify both -lazy and -eager"); } // Validate lazy compilation flags. if (cl::LazyCompilation) { if (cl::BytecodeFormat != cl::BytecodeFormatKind::HBC) err("-lazy only works with -target=HBC"); if (cl::OptimizationLevel > cl::OptLevel::Og) err("-lazy does not work with -O"); if (cl::BytecodeMode) { err("-lazy doesn't make sense with bytecode"); } if (!cl::CustomOptimize.empty()) { // We don't currently pass these around to be applied later. err("-lazy doesn't allow custom optimizations"); } if (cl::CommonJS) { err("-lazy doesn't support CommonJS modules"); } } // Validate flags for more than one input file. if (cl::InputFilenames.size() > 1) { if (cl::BytecodeMode) err("Hermes can only load one bytecode file."); if (cl::BytecodeFormat != cl::BytecodeFormatKind::HBC) err("Multiple files are only supported with HBC."); if (!cl::CommonJS) err("Multiple files must use CommonJS modules."); } // Validate source map output flags. if (cl::OutputSourceMap) { if (cl::BytecodeOutputFilename.empty()) err("-output-source-map requires -out to be set"); if (cl::BytecodeFormat != cl::BytecodeFormatKind::HBC) err("-output-source-map requires HBC target"); if (cl::DumpTarget != EmitBundle) err("-output-source-map only works with -emit-binary"); } // Validate bytecode dumping flags. if (cl::BytecodeMode && cl::DumpTarget != Execute) { if (cl::BytecodeFormat != cl::BytecodeFormatKind::HBC) err("Only Hermes bytecode files may be dumped"); if (cl::DumpTarget != DumpBytecode) err("You can only dump bytecode for HBC bytecode file."); } #ifndef HERMES_ENABLE_IR_INSTRUMENTATION if (cl::InstrumentIR) { err("Instrumentation is requested, but support is not compiled in"); } #endif return !errored; } /// Apply the -Werror, -Wno-error, -Werror=<category> and -Wno-error=<category> /// flags to \c sm from left to right. static void setWarningsAreErrorsFromFlags(SourceErrorManager &sm) { std::vector<Warning>::iterator yesIt = cl::Werror.begin(); std::vector<Warning>::iterator noIt = cl::Wnoerror.begin(); // Argument positions are indices into argv and start at 1 (or 2 if there's a // subcommand). See llvh::cl::CommandLineParser::ParseCommandLineOptions(). // In this loop, position 0 represents the lack of a value. unsigned noPos = 0, yesPos = 0; while (true) { if (noIt != cl::Wnoerror.end()) { noPos = cl::Wnoerror.getPosition(noIt - cl::Wnoerror.begin()); } else { noPos = 0; } if (yesIt != cl::Werror.end()) { yesPos = cl::Werror.getPosition(yesIt - cl::Werror.begin()); } else { yesPos = 0; } Warning warning; bool enable; if (yesPos != 0 && (noPos == 0 || yesPos < noPos)) { warning = *yesIt; enable = true; ++yesIt; } else if (noPos != 0 && (yesPos == 0 || noPos < yesPos)) { warning = *noIt; enable = false; ++noIt; } else { break; } if (warning == Warning::NoWarning) { sm.setWarningsAreErrors(enable); } else { sm.setWarningIsError(warning, enable); } } } /// Create a Context, respecting the command line flags. /// \return the Context. std::shared_ptr<Context> createContext( std::unique_ptr<Context::ResolutionTable> resolutionTable, std::vector<uint32_t> segments) { CodeGenerationSettings codeGenOpts; codeGenOpts.enableTDZ = cl::EnableTDZ; codeGenOpts.dumpOperandRegisters = cl::DumpOperandRegisters; codeGenOpts.dumpUseList = cl::DumpUseList; codeGenOpts.dumpSourceLocation = cl::DumpSourceLocation != LocationDumpMode::None; codeGenOpts.dumpIRBetweenPasses = cl::DumpBetweenPasses; if (cl::BytecodeFormat == cl::BytecodeFormatKind::HBC) { codeGenOpts.unlimitedRegisters = false; } codeGenOpts.instrumentIR = cl::InstrumentIR; OptimizationSettings optimizationOpts; // Enable aggressiveNonStrictModeOptimizations if the target is HBC. optimizationOpts.aggressiveNonStrictModeOptimizations = cl::BytecodeFormat == cl::BytecodeFormatKind::HBC; optimizationOpts.inlining = cl::OptimizationLevel != cl::OptLevel::O0 && cl::BytecodeFormat == cl::BytecodeFormatKind::HBC && cl::Inline; optimizationOpts.reusePropCache = cl::ReusePropCache; // When the setting is auto-detect, we will set the correct value after // parsing. optimizationOpts.staticBuiltins = cl::StaticBuiltins == cl::StaticBuiltinSetting::ForceOn; optimizationOpts.staticRequire = cl::StaticRequire; optimizationOpts.useUnsafeIntrinsics = cl::UseUnsafeIntrinsics; auto context = std::make_shared<Context>( codeGenOpts, optimizationOpts, std::move(resolutionTable), std::move(segments)); // Default is non-strict mode. context->setStrictMode(!cl::NonStrictMode && cl::StrictMode); context->setEnableEval(cl::EnableEval); context->getSourceErrorManager().setOutputOptions(guessErrorOutputOptions()); setWarningsAreErrorsFromFlags(context->getSourceErrorManager()); #define WARNING_CATEGORY(name, specifier, description) \ context->getSourceErrorManager().setWarningStatus( \ Warning::name, cl::name##Warning); #include "hermes/Support/Warnings.def" if (cl::DisableAllWarnings) context->getSourceErrorManager().disableAllWarnings(); context->getSourceErrorManager().setErrorLimit(cl::ErrorLimit); { // Set default lazy mode using defaults from CompileFlags to keep it in one // place. hermes::hbc::CompileFlags defaultFlags{}; context->setPreemptiveFileCompilationThreshold( defaultFlags.preemptiveFileCompilationThreshold); context->setPreemptiveFunctionCompilationThreshold( defaultFlags.preemptiveFunctionCompilationThreshold); } if (cl::EagerCompilation || cl::DumpTarget == EmitBundle || cl::OptimizationLevel > cl::OptLevel::Og) { // Make sure nothing is lazy context->setLazyCompilation(false); } else if (cl::LazyCompilation) { // Make sure everything is lazy context->setLazyCompilation(true); context->setPreemptiveFileCompilationThreshold(0); context->setPreemptiveFunctionCompilationThreshold(0); } else { // By default with no optimization, use lazy compilation for "large" files context->setLazyCompilation(true); } if (cl::CommonJS) { context->setUseCJSModules(true); } #if HERMES_PARSE_JSX if (cl::JSX) { context->setParseJSX(true); } #endif #if HERMES_PARSE_FLOW if (cl::ParseFlow) { context->setParseFlow(ParseFlowSetting::ALL); } #endif #if HERMES_PARSE_TS if (cl::ParseTS) { context->setParseTS(true); } #endif if (cl::DebugInfoLevel >= cl::DebugLevel::g3) { context->setDebugInfoSetting(DebugInfoSetting::ALL); } else if (cl::DebugInfoLevel == cl::DebugLevel::g2) { context->setDebugInfoSetting(DebugInfoSetting::SOURCE_MAP); } else { // -g1 or -g0. If -g0, we'll strip debug info later. context->setDebugInfoSetting(DebugInfoSetting::THROWING); } context->setEmitAsyncBreakCheck(cl::EmitAsyncBreakCheck); return context; } /// Parse \p file into a JSON value. /// \param alloc the allocator to use for JSON parsing. /// \return a metadata JSONObject allocated in the user-specified allocator, /// nullptr on failure. All error messages are printed to stderr. ::hermes::parser::JSONValue *parseJSONFile( std::unique_ptr<llvh::MemoryBuffer> &file, ::hermes::parser::JSLexer::Allocator &alloc) { using namespace ::hermes::parser; JSONFactory factory(alloc); SourceErrorManager sm; JSONParser parser(factory, *file, sm); auto root = parser.parse(); if (!root) { llvh::errs() << "Failed to parse metadata: Unable to parse a valid JSON object\n"; return nullptr; } return root.getValue(); } /// Given the root path to the directory or zip file, the file name, and /// a zip struct that represents the zip file if it's a zip, return /// the memory buffer of the file content. std::unique_ptr<llvh::MemoryBuffer> getFileFromDirectoryOrZip( zip_t *zip, llvh::StringRef rootPath, llvh::Twine fileName, bool silent = false) { llvh::SmallString<32> path{}; if (!zip) { llvh::sys::path::append(path, llvh::sys::path::Style::posix, rootPath); } llvh::sys::path::append(path, llvh::sys::path::Style::posix, fileName); llvh::sys::path::remove_dots(path, false, llvh::sys::path::Style::posix); return zip ? memoryBufferFromZipFile(zip, path.c_str(), silent) : memoryBufferFromFile(path, false, silent); } /// Read a module IDs table. It maps every file name to its unique global module /// ID. Prints out error messages to stderr in case of failure. /// \param metadata the full metadata JSONObject. Contains "moduleIDs". /// \return the module IDs table read from the metadata, None on failure. llvh::Optional<ModuleIDsTable> readModuleIDs( ::hermes::parser::JSONObject *metadata) { assert(metadata && "No metadata to read module IDs from"); using namespace ::hermes::parser; JSONObject *moduleIDs = llvh::dyn_cast_or_null<JSONObject>(metadata->get("moduleIDs")); if (!moduleIDs) { return llvh::None; } ModuleIDsTable result; llvh::DenseMap<uint32_t, llvh::StringRef> filenameByModuleID; for (auto itFile : *moduleIDs) { llvh::StringRef filename = llvh::sys::path::remove_leading_dotslash(itFile.first->str()); JSONNumber *moduleID = llvh::dyn_cast<JSONNumber>(itFile.second); if (!moduleID) { llvh::errs() << "Invalid value in module ID table for file: " << filename << '\n'; return llvh::None; } uint32_t uintModuleID = (uint32_t)moduleID->getValue(); if (uintModuleID != moduleID->getValue()) { llvh::errs() << "Module IDs must be unsigned integers: Found " << moduleID->getValue() << '\n'; return llvh::None; } auto emplaceRes = result.try_emplace(filename, uintModuleID); if (!emplaceRes.second) { llvh::errs() << "Duplicate entry in module ID table for file: " << filename << '\n'; return llvh::None; } auto inverseRes = filenameByModuleID.try_emplace(uintModuleID, filename); if (!inverseRes.second) { llvh::errs() << "Duplicate entry in module ID table for ID: " << uintModuleID << '\n'; return llvh::None; } } return result; } /// Read input filenames from the given path and populate the files in \p /// fileBufs. /// In case of failure, ensure fileBufs is empty. /// \param inputPath the path to the directory or zip file containing metadata /// and files. /// \param[out] fileBufs table of file buffers. /// \param alloc the allocator to use for JSON parsing of metadata. /// \return a pointer to the metadata JSON object, nullptr on failure. ::hermes::parser::JSONObject *readInputFilenamesFromDirectoryOrZip( llvh::StringRef inputPath, SegmentTable &fileBufs, std::vector<uint32_t> &segmentIDs, ::hermes::parser::JSLexer::Allocator &alloc, struct zip_t *zip) { auto metadataBuf = getFileFromDirectoryOrZip(zip, inputPath, "metadata.json"); if (!metadataBuf) { llvh::errs() << "Failed to read metadata: Input must contain a metadata.json file\n"; return nullptr; } auto *metadataVal = parseJSONFile(metadataBuf, alloc); if (!metadataVal) { // parseJSONFile prints any error messages. return nullptr; } // Pull data from the metadata JSON object into C++ data structures. // The metadata format is documented at doc/Modules.md. auto *metadata = dyn_cast<parser::JSONObject>(metadataVal); if (!metadata) { llvh::errs() << "Metadata must be a JSON object\n"; return nullptr; } auto *segments = llvh::dyn_cast_or_null<parser::JSONObject>(metadata->get("segments")); if (!segments) { llvh::errs() << "Metadata must contain segment information\n"; return nullptr; } // Module IDs in metadata, None if none could be read. auto externalModuleIDs = readModuleIDs(metadata); // Module ID table used for assigning auto-incrementing module IDs if we // don't have external module IDs. ModuleIDsTable automaticModuleIDs; uint32_t nextAutomaticModuleID = 0; for (auto it : *segments) { uint32_t segmentID; if (it.first->str().getAsInteger(10, segmentID)) { // getAsInteger returns true to signal error. llvh::errs() << "Metadata segment IDs must be unsigned integers: Found " << it.first->str() << '\n'; return nullptr; } auto *segment = llvh::dyn_cast_or_null<parser::JSONArray>(it.second); if (!segment) { llvh::errs() << "Metadata segment information must be an array\n"; return nullptr; } SegmentTableEntry segmentBufs{}; for (auto val : *segment) { auto *relPath = llvh::dyn_cast_or_null<parser::JSONString>(val); if (!relPath) { llvh::errs() << "Segment paths must be strings\n"; return nullptr; } auto filename = llvh::sys::path::remove_leading_dotslash(relPath->str()); auto fileBuf = getFileFromDirectoryOrZip(zip, inputPath, filename); if (!fileBuf) { return nullptr; } auto mapBuf = getFileFromDirectoryOrZip( zip, inputPath, llvh::Twine(filename, ".map"), true); uint32_t moduleID; if (externalModuleIDs.hasValue()) { auto itr = externalModuleIDs->find(filename); if (itr == externalModuleIDs->end()) { llvh::errs() << "Module is missing in externalModuleIDs: " << filename << "\n"; return nullptr; } moduleID = itr->second; } else { auto emplaceRes = automaticModuleIDs.try_emplace(filename, nextAutomaticModuleID); if (emplaceRes.second) { ++nextAutomaticModuleID; } moduleID = emplaceRes.first->second; } // mapBuf is optional, so simply pass it through if it's null. segmentBufs.push_back({moduleID, std::move(fileBuf), std::move(mapBuf)}); } auto emplaceRes = fileBufs.emplace(segmentID, std::move(segmentBufs)); if (!emplaceRes.second) { llvh::errs() << "Duplicate segment entry in metadata: " << segment << "\n"; return nullptr; } segmentIDs.push_back(segmentID); } return metadata; } /// A map from segment ID to the deserialized base bytecode of that segment. using BaseBytecodeMap = llvh::DenseMap<uint32_t, std::unique_ptr<hbc::BCProviderFromBuffer>>; /// Load the base bytecode provider from given file buffer \fileBuf. /// \return the base bytecode provider, or nullptr if an error happened. std::unique_ptr<hbc::BCProviderFromBuffer> loadBaseBytecodeProvider( std::unique_ptr<llvh::MemoryBuffer> fileBuf) { if (!fileBuf) { llvh::errs() << "Unable to read from base bytecode file.\n"; return nullptr; } // Transfer ownership to an owned memory buffer. auto ownedBuf = std::make_unique<OwnedMemoryBuffer>(std::move(fileBuf)); auto ret = hbc::BCProviderFromBuffer::createBCProviderFromBuffer( std::move(ownedBuf)); if (!ret.first) { llvh::errs() << "Error deserializing base bytecode: " << ret.second; return nullptr; } return std::move(ret.first); } /// Read the base bytecode provider map from either a directory or a zip file. /// This is used when commonjs is used and we need to optimize for delta /// bytecode updates. A metadata.hbc.json file is expected to exist in the /// directory or zip, which contains a map from segment ID to the file name of /// the base bytecode file for that segment. /// Returns whether the read succeeded. bool readBaseBytecodeFromDirectoryOrZip( BaseBytecodeMap &map, llvh::StringRef inputPath, ::hermes::parser::JSLexer::Allocator &alloc, struct zip_t *zip) { auto manifestBuf = getFileFromDirectoryOrZip(zip, inputPath, "manifest.json"); if (!manifestBuf) { llvh::errs() << "Failed to read manifest: Input must contain a manifest.json file\n"; return false; } auto *manifestVal = parseJSONFile(manifestBuf, alloc); if (!manifestVal) { // parseJSONFile prints any error messages. return false; } // Pull data from the manifest JSON object into C++ data structures. // The manifest format is documented at doc/Modules.md. auto *manifest = dyn_cast<parser::JSONArray>(manifestVal); if (!manifest) { llvh::errs() << "Manifest must be a JSON array.\n"; return false; } for (auto it : *manifest) { auto *segment = llvh::dyn_cast_or_null<parser::JSONObject>(it); if (!segment) { llvh::errs() << "Each segment entry must be a JSON object.\n"; return false; } llvh::StringRef prefix{"hbc-seg-"}; auto *flavor = llvh::dyn_cast_or_null<parser::JSONString>(segment->get("flavor")); if (!flavor || flavor->str().size() <= prefix.size() || !flavor->str().startswith(prefix)) { llvh::errs() << "flavor must be a string that prefix a number with " << prefix << ".\n"; return false; } uint32_t segmentID; if (flavor->str().substr(prefix.size()).getAsInteger(10, segmentID)) { // getAsInteger returns true to signal error. llvh::errs() << "flavor must be a string that prefix a number with " << prefix << ". Found " << flavor->str() << '\n'; return false; } auto *location = llvh::dyn_cast_or_null<parser::JSONString>(segment->get("location")); if (!location) { llvh::errs() << "Segment bytecode location must be a string.\n"; return false; } auto fileBuf = getFileFromDirectoryOrZip(zip, inputPath, location->str()); if (!fileBuf) { llvh::errs() << "Base bytecode does not exist: " << location->str() << ".\n"; return false; } auto bcProvider = loadBaseBytecodeProvider(std::move(fileBuf)); if (!bcProvider) { return false; } map[segmentID] = std::move(bcProvider); } return true; } /// Read base bytecode and returns whether it succeeded. bool readBaseBytecodeMap( BaseBytecodeMap &map, llvh::StringRef inputPath, ::hermes::parser::JSLexer::Allocator &alloc) { assert(!inputPath.empty() && "No base bytecode file requested"); struct zip_t *zip = zip_open(inputPath.data(), 0, 'r'); if (llvh::sys::fs::is_directory(inputPath) || zip) { auto ret = readBaseBytecodeFromDirectoryOrZip(map, inputPath, alloc, zip); if (zip) { zip_close(zip); } return ret; } auto bcProvider = loadBaseBytecodeProvider(memoryBufferFromFile(inputPath)); if (!bcProvider) { return false; } map[0] = std::move(bcProvider); return true; } /// Read a resolution table. Given a file name, it maps every require string /// to the actual file which must be required. /// Prints out error messages to stderr in case of failure. /// \param metadata the full metadata JSONObject. Contains "resolutionTable". /// \return the resolution table read from the metadata, nullptr on failure. std::unique_ptr<Context::ResolutionTable> readResolutionTable( ::hermes::parser::JSONObject *metadata) { assert(metadata && "No metadata to read resolution table from"); using namespace ::hermes::parser; auto result = std::make_unique<Context::ResolutionTable>(); JSONObject *resolutionTable = llvh::dyn_cast_or_null<JSONObject>(metadata->get("resolutionTable")); if (!resolutionTable) { return nullptr; } for (auto itFile : *resolutionTable) { llvh::StringRef filename = llvh::sys::path::remove_leading_dotslash(itFile.first->str()); JSONObject *fileTable = llvh::dyn_cast<JSONObject>(itFile.second); if (!fileTable) { llvh::errs() << "Invalid value in resolution table for file: " << filename << '\n'; return nullptr; } Context::ResolutionTableEntry map{}; for (auto itEntry : *fileTable) { JSONString *src = itEntry.first; JSONString *dstJSON = llvh::dyn_cast<JSONString>(itEntry.second); if (!dstJSON) { llvh::errs() << "Invalid value in resolution table: " << filename << '@' << src->str() << '\n'; return nullptr; } llvh::StringRef dst = llvh::sys::path::remove_leading_dotslash(dstJSON->str()); auto emplaceRes = map.try_emplace(src->str(), dst); if (!emplaceRes.second) { llvh::errs() << "Duplicate entry in resolution table: " << filename << '@' << src->str() << '\n'; return nullptr; } } auto emplaceRes = result->try_emplace(filename, std::move(map)); if (!emplaceRes.second) { llvh::errs() << "Duplicate entry in resolution table for file: " << filename << '\n'; return nullptr; } } return result; } /// Generate IR for CJS modules into the Module \p M for the source files in /// \p fileBufs if IR generation was requested. Otherwise, just parse the files. /// Treat the first element in fileBufs as the entry point. /// \param sourceMapGen the parsed versions of the input source maps, /// in the order in which the files were compiled. /// \return true on success, false on error, in which case an error will be /// printed. bool generateIRForSourcesAsCJSModules( Module &M, sem::SemContext &semCtx, const DeclarationFileListTy &declFileList, SegmentTable fileBufs, SourceMapGenerator *sourceMapGen) { auto context = M.shareContext(); llvh::SmallString<64> rootPath{fileBufs[0][0].file->getBufferIdentifier()}; llvh::sys::path::remove_filename(rootPath, llvh::sys::path::Style::posix); bool generateIR = cl::DumpTarget >= DumpIR; // Construct a MemoryBuffer for our global entry point. llvh::SmallString<64> entryPointFilename{ fileBufs[0][0].file->getBufferIdentifier()}; llvh::sys::path::replace_path_prefix( entryPointFilename, rootPath, "./", llvh::sys::path::Style::posix); // The top-level function is empty, due to the fact that it is not intended to // be executed. The Runtime must choose and execute the correct entry point // (main) module, from which other modules may be `require`d. auto globalMemBuffer = llvh::MemoryBuffer::getMemBufferCopy("", "<global>"); auto *globalAST = parseJS(context, semCtx, std::move(globalMemBuffer)); if (generateIR) { // If we aren't planning to do anything with the IR, // don't attempt to generate it. generateIRFromESTree(globalAST, &M, declFileList, {}); } std::vector<std::unique_ptr<SourceMap>> inputSourceMaps{}; inputSourceMaps.push_back(nullptr); std::vector<std::string> sources{"<global>"}; Function *topLevelFunction = generateIR ? M.getTopLevelFunction() : nullptr; llvh::DenseSet<uint32_t> generatedModuleIDs; for (auto &entry : fileBufs) { uint32_t segmentID = entry.first; for (ModuleInSegment &moduleInSegment : entry.second) { auto &fileBuf = moduleInSegment.file; llvh::SmallString<64> filename{fileBuf->getBufferIdentifier()}; if (sourceMapGen && generatedModuleIDs.count(moduleInSegment.id) == 0) { // This is the first time we're generating IR for this module. sources.push_back(fileBuf->getBufferIdentifier()); if (moduleInSegment.sourceMap) { SourceErrorManager sm; auto inputMap = SourceMapParser::parse(*moduleInSegment.sourceMap, sm); if (!inputMap) { // parse() returns nullptr on failure and reports its own errors. return false; } inputSourceMaps.push_back(std::move(inputMap)); } else { inputSourceMaps.push_back(nullptr); } } generatedModuleIDs.insert(moduleInSegment.id); llvh::sys::path::replace_path_prefix( filename, rootPath, "./", llvh::sys::path::Style::posix); // TODO: use sourceMapTranslator for CJS module. auto *ast = parseJS( context, semCtx, std::move(fileBuf), /*sourceMap*/ nullptr, /*sourceMapTranslator*/ nullptr, /*wrapCJSModule*/ true); if (!ast) { return false; } if (!generateIR) { continue; } generateIRForCJSModule( cast<ESTree::FunctionExpressionNode>(ast), segmentID, moduleInSegment.id, llvh::sys::path::remove_leading_dotslash(filename), &M, topLevelFunction, declFileList); } } if (sourceMapGen) { for (const auto &source : sources) { sourceMapGen->addSource(source); } sourceMapGen->setInputSourceMaps(std::move(inputSourceMaps)); } return true; } /// Disassemble the BCProvider \p bytecode to the output stream specified by the /// command line flags. \return a CompileResult for the disassembly. CompileResult disassembleBytecode(std::unique_ptr<hbc::BCProvider> bytecode) { assert( cl::BytecodeFormat == cl::BytecodeFormatKind::HBC && "validateFlags() should enforce only HBC files may be disassembled"); OutputStream fileOS(llvh::outs()); if (!cl::BytecodeOutputFilename.empty() && !fileOS.open(cl::BytecodeOutputFilename, F_Text)) { return OutputFileError; } hbc::DisassemblyOptions disassemblyOptions = cl::Pretty ? hbc::DisassemblyOptions::Pretty : hbc::DisassemblyOptions::None; hbc::BytecodeDisassembler disassembler(std::move(bytecode)); disassembler.setOptions(disassemblyOptions); disassembler.disassemble(fileOS.os()); if (!fileOS.close()) return OutputFileError; return Success; } /// Process the bytecode file given in \p fileBuf. Disassemble it if requested, /// otherwise return it as the CompileResult artifact. \return a compile result. CompileResult processBytecodeFile(std::unique_ptr<llvh::MemoryBuffer> fileBuf) { assert(cl::BytecodeMode && "Input files must be bytecode"); assert( cl::BytecodeFormat == cl::BytecodeFormatKind::HBC && "Only HBC bytecode format may be loaded"); bool isMmapped = fileBuf->getBufferKind() == llvh::MemoryBuffer::MemoryBuffer_MMap; char *bufStart = const_cast<char *>(fileBuf->getBufferStart()); size_t bufSize = fileBuf->getBufferSize(); std::string filename = fileBuf->getBufferIdentifier(); std::unique_ptr<hbc::BCProviderFromBuffer> bytecode; auto buffer = std::make_unique<OwnedMemoryBuffer>(std::move(fileBuf)); auto ret = hbc::BCProviderFromBuffer::createBCProviderFromBuffer(std::move(buffer)); if (!ret.first) { llvh::errs() << "Error deserializing bytecode: " << ret.second; return InputFileError; } bytecode = std::move(ret.first); if (cl::DumpTarget != Execute) { assert( cl::DumpTarget == DumpBytecode && "validateFlags() should enforce bytecode files " "may only have a dump target of bytecode"); return disassembleBytecode(std::move(bytecode)); } else { CompileResult result{Success}; result.bytecodeProvider = std::move(bytecode); result.bytecodeBufferInfo = BytecodeBufferInfo{isMmapped, bufStart, bufSize, std::move(filename)}; return result; } } /// Compile the given module \p M with the options \p genOptions in a form /// suitable for immediate execution (i.e. no expectation of persistence). /// \return the compile result. CompileResult generateBytecodeForExecution( Module &M, const BytecodeGenerationOptions &genOptions) { std::shared_ptr<Context> context = M.shareContext(); CompileResult result{Success}; if (cl::BytecodeFormat == cl::BytecodeFormatKind::HBC) { result.bytecodeProvider = hbc::BCProviderFromSrc::createBCProviderFromSrc( hbc::generateBytecodeModule(&M, M.getTopLevelFunction(), genOptions)); } else { llvm_unreachable("Invalid bytecode kind for execution"); result = InvalidFlags; } return result; } /// Compile the module \p M with the options \p genOptions, serializing the /// result to \p OS. If sourceMapGenOrNull is not null, populate it. /// \return the CompileResult. /// The corresponding base bytecode will be removed from \baseBytecodeMap. CompileResult generateBytecodeForSerialization( raw_ostream &OS, Module &M, const BytecodeGenerationOptions &genOptions, const SHA1 &sourceHash, hermes::OptValue<uint32_t> segment, SourceMapGenerator *sourceMapGenOrNull, BaseBytecodeMap &baseBytecodeMap) { // Serialize the bytecode to the file. if (cl::BytecodeFormat == cl::BytecodeFormatKind::HBC) { std::unique_ptr<hbc::BCProviderFromBuffer> baseBCProvider = nullptr; auto itr = baseBytecodeMap.find(segment ? *segment : 0); if (itr != baseBytecodeMap.end()) { baseBCProvider = std::move(itr->second); // We want to erase it from the map because unique_ptr can only // have one owner. baseBytecodeMap.erase(itr); } auto bytecodeModule = hbc::generateBytecode( &M, OS, genOptions, sourceHash, segment, sourceMapGenOrNull, std::move(baseBCProvider)); if (cl::DumpTarget == DumpBytecode) { disassembleBytecode(hbc::BCProviderFromSrc::createBCProviderFromSrc( std::move(bytecodeModule))); } } else { llvm_unreachable("Invalid bytecode kind"); } return Success; } /// Compiles the given files \p fileBufs with the context \p context, /// respecting the command line flags. /// \return a CompileResult containing the compilation status and artifacts. CompileResult processSourceFiles( std::shared_ptr<Context> context, SegmentTable fileBufs) { assert(!fileBufs.empty() && "Need at least one file to compile"); assert(context && "Need a context to compile using"); assert(!cl::BytecodeMode && "Input files must not be bytecode"); llvh::SHA1 hasher; for (const auto &entry : fileBufs) { for (const auto &fileAndMap : entry.second) { const auto &file = fileAndMap.file; hasher.update( llvh::StringRef(file->getBufferStart(), file->getBufferSize())); } } auto rawFinalHash = hasher.final(); SHA1 sourceHash{}; assert( rawFinalHash.size() == SHA1_NUM_BYTES && "Incorrect length of SHA1 hash"); std::copy(rawFinalHash.begin(), rawFinalHash.end(), sourceHash.begin()); #ifndef NDEBUG if (cl::LexerOnly) { unsigned count = 0; for (auto &entry : fileBufs) { for (auto &fileAndMap : entry.second) { parser::JSLexer jsLexer( std::move(fileAndMap.file), context->getSourceErrorManager(), context->getAllocator()); while (jsLexer.advance()->getKind() != parser::TokenKind::eof) ++count; } } llvh::outs() << count << " tokens lexed\n"; return Success; } #endif // A list of parsed global definition files. DeclarationFileListTy declFileList; // Load the runtime library. std::unique_ptr<llvh::MemoryBuffer> libBuffer; switch (cl::BytecodeFormat) { case cl::BytecodeFormatKind::HBC: libBuffer = llvh::MemoryBuffer::getMemBuffer(libhermes); break; } if (!loadGlobalDefinition(*context, std::move(libBuffer), declFileList)) { return LoadGlobalsFailed; } // Load the global property definitions. for (const auto &fileName : cl::IncludeGlobals) { auto fileBuf = memoryBufferFromFile(fileName); if (!fileBuf) return InputFileError; LLVM_DEBUG( llvh::dbgs() << "Parsing global definitions from " << fileName << '\n'); if (!loadGlobalDefinition(*context, std::move(fileBuf), declFileList)) { return LoadGlobalsFailed; } } // Create the source map if requested. llvh::Optional<SourceMapGenerator> sourceMapGen{}; if (cl::OutputSourceMap) { sourceMapGen = SourceMapGenerator{}; } Module M(context); sem::SemContext semCtx{}; if (context->getUseCJSModules()) { // Allow the IR generation function to populate inputSourceMaps to ensure // proper source map ordering. if (!generateIRForSourcesAsCJSModules( M, semCtx, declFileList, std::move(fileBufs), sourceMapGen ? &*sourceMapGen : nullptr)) { return ParsingFailed; } if (cl::DumpTarget < DumpIR) { return Success; } } else { if (sourceMapGen) { for (const auto &filename : cl::InputFilenames) { sourceMapGen->addSource(filename == "-" ? "<stdin>" : filename); } } auto &mainFileBuf = fileBufs[0][0]; std::unique_ptr<SourceMap> sourceMap{nullptr}; if (mainFileBuf.sourceMap) { SourceErrorManager sm; sourceMap = SourceMapParser::parse(*mainFileBuf.sourceMap, sm); if (!sourceMap) { // parse() returns nullptr on failure and reports its own errors. return InputFileError; } } auto sourceMapTranslator = std::make_shared<SourceMapTranslator>(context->getSourceErrorManager()); context->getSourceErrorManager().setTranslator(sourceMapTranslator); ESTree::NodePtr ast = parseJS( context, semCtx, std::move(mainFileBuf.file), std::move(sourceMap), sourceMapTranslator); if (!ast) { return ParsingFailed; } if (cl::DumpTarget < DumpIR) { return Success; } generateIRFromESTree(ast, &M, declFileList, {}); } // Bail out if there were any errors. We can't ensure that the module is in // a valid state. if (auto N = context->getSourceErrorManager().getErrorCount()) { llvh::errs() << "Emitted " << N << " errors. exiting.\n"; return ParsingFailed; } // Run custom optimization pipeline. if (!cl::CustomOptimize.empty()) { std::vector<std::string> opts( cl::CustomOptimize.begin(), cl::CustomOptimize.end()); if (!runCustomOptimizationPasses(M, opts)) { llvh::errs() << "Invalid custom optimizations selected.\n\n" << PassManager::getCustomPassText(); return InvalidFlags; } } else { switch (cl::OptimizationLevel) { case cl::OptLevel::O0: runNoOptimizationPasses(M); break; case cl::OptLevel::Og: runDebugOptimizationPasses(M); break; case cl::OptLevel::OMax: runFullOptimizationPasses(M); break; } } // Bail out if there were any errors during optimization. if (auto N = context->getSourceErrorManager().getErrorCount()) { llvh::errs() << "Emitted " << N << " errors. exiting.\n"; return OptimizationFailed; } // In dbg builds, verify the module before we emit bytecode. if (cl::VerifyIR) { bool failedVerification = verifyModule(M, &llvh::errs()); if (failedVerification) { M.dump(); return VerificationFailed; } assert(!failedVerification && "Module verification failed!"); } if (cl::DumpTarget == DumpIR) { M.dump(); return Success; } #ifndef NDEBUG if (cl::DumpTarget == ViewCFG) { M.viewGraph(); return Success; } #endif BytecodeGenerationOptions genOptions{cl::DumpTarget}; genOptions.optimizationEnabled = cl::OptimizationLevel > cl::OptLevel::Og; genOptions.prettyDisassemble = cl::Pretty; genOptions.basicBlockProfiling = cl::BasicBlockProfiling; // The static builtin setting should be set correctly after command line // options parsing and js parsing. Set the bytecode header flag here. genOptions.staticBuiltinsEnabled = context->getStaticBuiltinOptimization(); genOptions.padFunctionBodiesPercent = cl::PadFunctionBodiesPercent; // If the user requests to output a source map, then do not also emit debug // info into the bytecode. genOptions.stripDebugInfoSection = cl::OutputSourceMap || cl::DebugInfoLevel == cl::DebugLevel::g0; genOptions.stripFunctionNames = cl::StripFunctionNames; // If the dump target is None, return bytecode in an executable form. if (cl::DumpTarget == Execute) { assert( !sourceMapGen && "validateFlags() should enforce no source map output for execution"); return generateBytecodeForExecution(M, genOptions); } BaseBytecodeMap baseBytecodeMap; if (cl::BytecodeFormat == cl::BytecodeFormatKind::HBC && !cl::BaseBytecodeFile.empty()) { if (!readBaseBytecodeMap( baseBytecodeMap, cl::BaseBytecodeFile, context->getAllocator())) { return InputFileError; } } CompileResult result{Success}; StringRef base = cl::BytecodeOutputFilename; if (context->getSegments().size() < 2) { OutputStream fileOS{llvh::outs()}; if (!base.empty() && !fileOS.open(base, F_None)) { return OutputFileError; } auto result = generateBytecodeForSerialization( fileOS.os(), M, genOptions, sourceHash, llvh::None, sourceMapGen ? sourceMapGen.getPointer() : nullptr, baseBytecodeMap); if (result.status != Success) { return result; } if (!fileOS.close()) return OutputFileError; } else { OutputStream manifestOS{llvh::nulls()}; if (!base.empty() && !cl::BytecodeManifestFilename.empty()) { llvh::SmallString<32> manifestPath = llvh::sys::path::parent_path(base); llvh::sys::path::append(manifestPath, cl::BytecodeManifestFilename); if (!manifestOS.open(manifestPath, F_Text)) return OutputFileError; } JSONEmitter manifest{manifestOS.os(), /* pretty */ true}; manifest.openArray(); for (const auto segment : context->getSegments()) { std::string filename = base.str(); if (segment != 0) { filename += "." + std::to_string(segment); } std::string flavor = "hbc-seg-" + std::to_string(segment); OutputStream fileOS{llvh::outs()}; if (!base.empty() && !fileOS.open(filename, F_None)) { return OutputFileError; } auto segResult = generateBytecodeForSerialization( fileOS.os(), M, genOptions, sourceHash, segment, sourceMapGen ? sourceMapGen.getPointer() : nullptr, baseBytecodeMap); if (segResult.status != Success) { return segResult; } if (!fileOS.close()) return OutputFileError; // Add to the manifest. manifest.openDict(); manifest.emitKeyValue("resource", llvh::sys::path::filename(base)); manifest.emitKeyValue("flavor", flavor); manifest.emitKeyValue("location", llvh::sys::path::filename(filename)); manifest.closeDict(); } manifest.closeArray(); if (!manifestOS.close()) { return OutputFileError; } result = Success; } // Output the source map if requested. if (cl::OutputSourceMap) { OutputStream OS; if (!OS.open(base.str() + ".map", F_Text)) return OutputFileError; sourceMapGen->outputAsJSON(OS.os()); if (!OS.close()) return OutputFileError; } return result; } /// Print the Hermes version to the stream \p s, outputting the \p vmStr (which /// may be empty). /// \param features when true, print the list of enabled features. void printHermesVersion( llvh::raw_ostream &s, const char *vmStr = "", bool features = true) { s << "Hermes JavaScript compiler" << vmStr << ".\n" #ifdef HERMES_RELEASE_VERSION << " Hermes release version: " << HERMES_RELEASE_VERSION << "\n" #endif << " HBC bytecode version: " << hermes::hbc::BYTECODE_VERSION << "\n" << "\n"; if (features) { s << " Features:\n" #ifdef HERMES_ENABLE_DEBUGGER << " Debugger\n" #endif << " Zip file input\n"; } } } // namespace namespace hermes { namespace driver { void printHermesCompilerVMVersion(llvh::raw_ostream &s) { printHermesVersion(s, " and Virtual Machine"); } void printHermesCompilerVersion(llvh::raw_ostream &s) { printHermesVersion(s); } OutputFormatKind outputFormatFromCommandLineOptions() { return cl::DumpTarget; } CompileResult compileFromCommandLineOptions() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) if (cl::PrintStats) hermes::EnableStatistics(); #endif // Set up and validate flags. setFlagDefaults(); if (!validateFlags()) return InvalidFlags; // Load input files. SegmentTable fileBufs{}; // Allocator for the metadata table. ::hermes::parser::JSLexer::Allocator metadataAlloc; // Resolution table in metadata, null if none could be read. std::unique_ptr<Context::ResolutionTable> resolutionTable = nullptr; // Segment IDs in metadata. std::vector<uint32_t> segments; // Attempt to open the first file as a Zip file. struct zip_t *zip = zip_open(cl::InputFilenames[0].data(), 0, 'r'); if (llvh::sys::fs::is_directory(cl::InputFilenames[0]) || zip) { ::hermes::parser::JSONObject *metadata = readInputFilenamesFromDirectoryOrZip( cl::InputFilenames[0], fileBufs, segments, metadataAlloc, zip); if (zip) { zip_close(zip); } if (!metadata) { return InputFileError; } resolutionTable = readResolutionTable(metadata); } else { // If we aren't reading from a dir or a zip, we have only one segment. segments.push_back(0); uint32_t nextModuleID = 0; ModuleIDsTable moduleIDs; SegmentTableEntry entry{}; for (const std::string &filename : cl::InputFilenames) { auto fileBuf = memoryBufferFromFile(filename, true); if (!fileBuf) return InputFileError; auto emplaceRes = moduleIDs.try_emplace(filename, nextModuleID); auto moduleID = emplaceRes.first->second; if (emplaceRes.second) { ++nextModuleID; } entry.push_back({moduleID, std::move(fileBuf), nullptr}); } // Read input source map if available. if (!cl::InputSourceMap.empty()) { // TODO: support multiple JS sources from command line. if (cl::InputFilenames.size() != 1) { llvh::errs() << "Error: only support single js file for input source map." << '\n'; return InvalidFlags; } assert(entry.size() == 1 && "Can't have more than one entries."); entry[0].sourceMap = memoryBufferFromFile(cl::InputSourceMap, /*stdinOk*/ false); } fileBufs.emplace(0, std::move(entry)); } if (cl::BytecodeMode) { assert( fileBufs.size() == 1 && fileBufs[0].size() == 1 && "validateFlags() should enforce exactly one bytecode input file"); return processBytecodeFile(std::move(fileBufs[0][0].file)); } else { std::shared_ptr<Context> context = createContext(std::move(resolutionTable), std::move(segments)); return processSourceFiles(context, std::move(fileBufs)); } } } // namespace driver } // namespace hermes #undef DEBUG_TYPE

lib/CompilerDriver/CompilerDriver.cpp (1,696 lines of code) (raw):