hphp/compiler/package.cpp (806 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/compiler/package.h" #include <exception> #include <fstream> #include <map> #include <memory> #include <set> #include <sys/stat.h> #include <sys/types.h> #include <utility> #include <vector> #include <boost/filesystem.hpp> #include <folly/String.h> #include <folly/portability/Dirent.h> #include <folly/portability/Unistd.h> #include "hphp/compiler/analysis/analysis_result.h" #include "hphp/compiler/option.h" #include "hphp/hhvm/process-init.h" #include "hphp/runtime/base/coeffects-config.h" #include "hphp/runtime/base/execution-context.h" #include "hphp/runtime/base/file-util-defs.h" #include "hphp/runtime/base/file-util.h" #include "hphp/runtime/base/program-functions.h" #include "hphp/runtime/vm/as.h" #include "hphp/runtime/vm/func-emitter.h" #include "hphp/runtime/vm/unit-emitter.h" #include "hphp/runtime/vm/unit-parser.h" #include "hphp/util/exception.h" #include "hphp/util/extern-worker.h" #include "hphp/util/hash.h" #include "hphp/util/logger.h" #include "hphp/util/process.h" #include "hphp/util/timer.h" #include "hphp/zend/zend-string.h" using namespace HPHP; using namespace extern_worker; namespace fs = boost::filesystem; /////////////////////////////////////////////////////////////////////////////// const StaticString s_EntryPoint("__EntryPoint"); /////////////////////////////////////////////////////////////////////////////// // Configuration for parse workers. This should contain any runtime // options which can affect HackC (or the interface to it). struct Package::Config { Config() = default; static Config make() { Config c; #define R(Opt) c.Opt = RO::Opt; UNITCACHEFLAGS() #undef R c.EvalAbortBuildOnCompilerError = RO::EvalAbortBuildOnCompilerError; c.EvalAbortBuildOnVerifyError = RO::EvalAbortBuildOnVerifyError; c.IncludeRoots = RO::IncludeRoots; c.coeffects = CoeffectsConfig::exportForParse(); return c; } void apply() const { #define R(Opt) RO::Opt = Opt; UNITCACHEFLAGS() #undef R RO::EvalAbortBuildOnCompilerError = EvalAbortBuildOnCompilerError; RO::EvalAbortBuildOnVerifyError = EvalAbortBuildOnVerifyError; RO::IncludeRoots = IncludeRoots; CoeffectsConfig::importForParse(coeffects); } template <typename SerDe> void serde(SerDe& sd) { #define R(Opt) sd(Opt); UNITCACHEFLAGS() #undef R sd(EvalAbortBuildOnCompilerError) (EvalAbortBuildOnVerifyError) (IncludeRoots) (coeffects); } private: #define R(Opt) decltype(RuntimeOption::Opt) Opt; UNITCACHEFLAGS() #undef R bool EvalAbortBuildOnCompilerError; bool EvalAbortBuildOnVerifyError; decltype(RO::IncludeRoots) IncludeRoots; CoeffectsConfig coeffects; }; /////////////////////////////////////////////////////////////////////////////// Package::AsyncState::AsyncState() : m_executor{ "HPHPcWorker", 0, size_t(Option::ParserThreadCount <= 0 ? 1 : Option::ParserThreadCount), [] { hphp_thread_init(); g_context.getCheck(); }, [] { hphp_thread_exit(); }, std::chrono::minutes{15} } , m_client{m_executor.sticky(), makeOptions()} , m_config{ [this] { return m_client.store(Config::make()); }, m_executor.sticky() } , m_repoOptions{m_client} { } Options Package::AsyncState::makeOptions() { Options options; options .setUseCase(Option::ExternWorkerUseCase) .setUseSubprocess(Option::ExternWorkerForceSubprocess ? Options::UseSubprocess::Always : Options::UseSubprocess::Fallback) .setCacheExecs(Option::ExternWorkerUseExecCache) .setCleanup(Option::ExternWorkerCleanup) .setUseEdenFS(RuntimeOption::EvalUseEdenFS); if (Option::ExternWorkerTimeoutSecs > 0) { options.setTimeout(std::chrono::seconds{Option::ExternWorkerTimeoutSecs}); } if (!Option::ExternWorkerWorkingDir.empty()) { options.setWorkingDir(Option::ExternWorkerWorkingDir); } return options; } /////////////////////////////////////////////////////////////////////////////// Package::Package(const char* root, bool parseOnDemand) : m_parseFailed{false} , m_parseOnDemand{parseOnDemand} , m_cacheHits{0} , m_readFiles{0} , m_storedFiles{0} , m_total{0} { m_root = FileUtil::normalizeDir(root); m_ar = std::make_shared<AnalysisResult>(); m_fileCache = std::make_shared<FileCache>(); } void Package::createAsyncState() { assertx(!m_async); m_async = std::make_unique<AsyncState>(); } Optional<std::thread> Package::clearAsyncState() { if (!m_async) return std::nullopt; if (!Option::ParserAsyncCleanup) { // If we don't want to cleanup asynchronously, do so now. m_async.reset(); return std::nullopt; } // All the thread does is reset the unique_ptr to run the dtor. return std::thread{ [a = std::move(m_async)] () mutable { a.reset(); } }; } void Package::addInputList(const std::string& listFileName) { assert(!listFileName.empty()); auto const f = fopen(listFileName.c_str(), "r"); if (f == nullptr) { throw Exception("Unable to open %s: %s", listFileName.c_str(), folly::errnoStr(errno).c_str()); } char fileName[PATH_MAX]; while (fgets(fileName, sizeof(fileName), f)) { int len = strlen(fileName); if (fileName[len - 1] == '\n') fileName[len - 1] = '\0'; len = strlen(fileName); if (len) { if (FileUtil::isDirSeparator(fileName[len - 1])) { addDirectory(fileName); } else { addSourceFile(fileName); } } } fclose(f); } void Package::addStaticFile(const std::string& fileName) { assert(!fileName.empty()); m_extraStaticFiles.insert(fileName); } void Package::addStaticDirectory(const std::string& path) { m_staticDirectories.insert(path); } void Package::addDirectory(const std::string& path) { m_directories.emplace(path); } void Package::addSourceFile(const std::string& fileName) { if (fileName.empty()) return; auto canonFileName = FileUtil::canonicalize(String(fileName)).toCppString(); m_filesToParse.emplace(std::move(canonFileName), true); } std::shared_ptr<FileCache> Package::getFileCache() { for (auto const& dir : m_directories) { std::vector<std::string> files; FileUtil::find(files, m_root, dir, /* php */ false, &Option::PackageExcludeStaticDirs, &Option::PackageExcludeStaticFiles); Option::FilterFiles(files, Option::PackageExcludeStaticPatterns); for (auto& file : files) { auto const rpath = file.substr(m_root.size()); if (!m_fileCache->fileExists(rpath.c_str())) { Logger::Verbose("saving %s", file.c_str()); m_fileCache->write(rpath.c_str(), file.c_str()); } } } for (auto const& dir : m_staticDirectories) { std::vector<std::string> files; FileUtil::find(files, m_root, dir, /* php */ false); for (auto& file : files) { auto const rpath = file.substr(m_root.size()); if (!m_fileCache->fileExists(rpath.c_str())) { Logger::Verbose("saving %s", file.c_str()); m_fileCache->write(rpath.c_str(), file.c_str()); } } } for (auto const& file : m_extraStaticFiles) { if (!m_fileCache->fileExists(file.c_str())) { auto const fullpath = m_root + file; Logger::Verbose("saving %s", fullpath.c_str()); m_fileCache->write(file.c_str(), fullpath.c_str()); } } for (auto const& pair : m_discoveredStaticFiles) { auto const file = pair.first.c_str(); if (!m_fileCache->fileExists(file)) { const char *fullpath = pair.second.c_str(); Logger::Verbose("saving %s", fullpath[0] ? fullpath : file); if (fullpath[0]) { m_fileCache->write(file, fullpath); } else { m_fileCache->write(file); } } } return m_fileCache; } /////////////////////////////////////////////////////////////////////////////// namespace { std::unique_ptr<UnitEmitter> createSymlinkWrapper(const std::string& fileName, const std::string& targetPath, std::unique_ptr<UnitEmitter> origUE) { auto found = false; std::ostringstream ss; for (auto const& fe : origUE->fevec()) { auto const& attrs = fe->userAttributes; if (attrs.find(s_EntryPoint.get()) != attrs.end()) { found = true; std::string escapedName; folly::cEscape(fe->name->toCppString(), escapedName); ss << ".function{} [unique persistent " "\"__EntryPoint\"(\"\"\"y:0:{}\"\"\")] (4,7) <\"\" N > " "entrypoint$symlink$" << string_sha1(fileName) << "() {\n" << " String \"" << targetPath << "\"\n" << " ReqOnce\n" << " PopC\n" << " NullUninit\n" << " NullUninit\n" << " FCallFuncD <> 0 1 \"\" \"\" - \"\" \"" << escapedName << "\"\n" << " PopC\n" << " Null\n" << " RetC\n" << "}\n\n"; break; } } if (!found) return nullptr; auto const content = ss.str(); return assemble_string( content.data(), content.size(), fileName.c_str(), SHA1{string_sha1(content)}, Native::s_noNativeFuncs, false ); } /////////////////////////////////////////////////////////////////////////////// // Metadata for a parse job. Just filename things that we need to // resolve when we have the whole source tree available. struct FileMeta { FileMeta() = default; FileMeta(std::string f, Optional<std::string> o) : m_filename{std::move(f)}, m_targetPath{std::move(o)} {} // The (relative) filename of the file std::string m_filename; // If the file is a symlink, what its target is Optional<std::string> m_targetPath; template <typename SerDe> void serde(SerDe& sd) { sd(m_filename) (m_targetPath); } }; // Wraps an unique_ptr<UnitEmitter> as a return value from a parse job // and its serialization logic. struct UnitEmitterWrapper { UnitEmitterWrapper() = default; /* implicit */ UnitEmitterWrapper(std::unique_ptr<UnitEmitter> ue) : m_ue{std::move(ue)} {} std::unique_ptr<UnitEmitter> m_ue; template <typename SerDe> void serde(SerDe& sd) { if constexpr (SerDe::deserializing) { assertx(!m_ue); bool present; sd(present); if (present) { SHA1 sha1; const StringData* filepath; sd(sha1); sd(filepath); auto ue = std::make_unique<UnitEmitter>( sha1, SHA1{}, Native::s_noNativeFuncs, false ); ue->m_filepath = makeStaticString(filepath); ue->serde(sd, false); m_ue = std::move(ue); } } else { if (m_ue) { sd(true); sd(m_ue->sha1()); sd(m_ue->m_filepath); m_ue->serde(sd, false); } else { sd(false); } } } }; // Metadata about the UnitEmitter obtained during parsing. This is // returned separately from the UnitEmitter, since we need this to // find new files for parse on-demand, and we can avoid loading the // entire UnitEmitter just to get this. struct ParseMeta { // Symbols present in the unit. This will be used to find new files // for parse on-demand. SymbolRefs m_symbol_refs; // If not empty, parsing resulted in an ICE and configuration // indicated that this should be fatal. std::string m_abort; template <typename SerDe> void serde(SerDe& sd) { sd(m_symbol_refs) (m_abort); } }; // Extern-worker job for parsing a source file into an UnitEmitter // (and some other metadata). struct ParseJob { static std::string name() { return "hphpc-parse"; } static void init(const Package::Config& config) { rds::local::init(); Hdf hdf; IniSetting::Map ini = IniSetting::Map::object; RO::Load(ini, hdf); config.apply(); Logger::LogLevel = Logger::LogError; // Inhibit extensions and systemlib from being initialized. It // takes a while and we don't need it. register_process_init(true); hphp_process_init(true); // Don't use unit emitter's caching here, we're relying on // extern-worker to do that for us. g_unit_emitter_cache_hook = nullptr; // This is a lie, but a lot of stuff breaks if you don't set it to // true (when false, everything assumes you're parsing systemlib). SystemLib::s_inited = true; } static void fini() { hphp_process_exit(); rds::local::fini(); } static Multi<ParseMeta, UnitEmitterWrapper> makeOutput(std::unique_ptr<UnitEmitter> ue) { if (!ue) return std::make_tuple(ParseMeta{}, UnitEmitterWrapper{}); auto symbolRefs = std::move(ue->m_symbol_refs); return std::make_tuple(ParseMeta{std::move(symbolRefs)}, std::move(ue)); } static Multi<ParseMeta, UnitEmitterWrapper> run( const std::string& content, const FileMeta& meta, const RepoOptionsFlags& repoOptions) { auto const& fileName = meta.m_filename; try { if (RO::EvalAllowHhas) { if (fileName.size() > 5 && !fileName.compare(fileName.size() - 5, std::string::npos, ".hhas")) { auto ue = assemble_string( content.data(), content.size(), fileName.c_str(), SHA1{string_sha1(content)}, Native::s_noNativeFuncs ); if (meta.m_targetPath) { ue = createSymlinkWrapper( fileName, *meta.m_targetPath, std::move(ue) ); if (!ue) { // If the symlink contains no EntryPoint we don't do // anything but it is still success return makeOutput(nullptr); } } return makeOutput(std::move(ue)); } } LazyUnitContentsLoader loader{ SHA1{mangleUnitSha1(string_sha1(content), fileName, repoOptions)}, content, repoOptions }; auto const mode = RO::EvalAbortBuildOnCompilerError ? CompileAbortMode::AllErrors : RO::EvalAbortBuildOnVerifyError ? CompileAbortMode::VerifyErrors : CompileAbortMode::OnlyICE; auto uc = UnitCompiler::create( loader, fileName.c_str(), Native::s_noNativeFuncs, false, false ); assertx(uc); std::unique_ptr<UnitEmitter> ue; try { auto cacheHit = false; ue = uc->compile(cacheHit, mode); // We disabled UnitCompiler caching, so we shouldn't have any // hits. assertx(!cacheHit); } catch (const CompilerAbort& exn) { return std::make_tuple(ParseMeta{{}, exn.what()}, UnitEmitterWrapper{}); } if (ue) { if (!ue->m_ICE && meta.m_targetPath) { ue = createSymlinkWrapper(fileName, *meta.m_targetPath, std::move(ue)); if (!ue) { // If the symlink contains no EntryPoint we don't do anything but it // is still success return makeOutput(nullptr); } } return makeOutput(std::move(ue)); } else { throw Error{ folly::sformat( "Unable to compile using {} compiler: {}", uc->getName(), fileName ) }; } } catch (const std::exception& exn) { throw Error{ folly::sformat("While parsing `{}`: {}", fileName, exn.what()) }; } } }; Job<ParseJob> g_parseJob; } /////////////////////////////////////////////////////////////////////////////// /* * File grouping: * * Since every invocation of an extern-worker worker has some fixed * overhead, we want to parse multiple files per invocation. We also * want to leverage any caching that extern-worker has for job * execution. Since we assume that source files will change over time, * we don't want to group too many files together (if one file * changes, we'll have to reparse all of them in that * group). Furthermore, to maximize cache effectiveness, we want to * group files together in a deterministic way. Finally, there may be * different "subsections" of the source tree, which are only parsed * depending on the input files configeration (for example, some * builds may discard test directories and some might not). Again, we * want to maximize caching across these different "flavor" of builds * and try to avoid grouping together files from these different * subsets. * * We utilize the following scheme to try to accomplish all * this. First we define a group size (Option::ParserGroupSize). This * is the amount of files (on average) we'll group together in one * job. Input files are discovered by walking directories * recursively. We proceed bottom up. For every directory, we first * process its sub-directories. Each sub-directory processed returns * the groups it has already created (each roughly containing * Option::ParserGroupSize) files, along with any "left over" files * which have not been grouped. These results are all aggregated * together, and any files in the current directory are added to the * ungrouped set. If the number of files in the ungrouped set exceeds * Option::ParserDirGroupSizeLimit, then we attempt to group all of * those files. * * Grouping is done by hashing the files' names, and then using * consistent_hash to assign them to one of N buckets (where N is the * number of files divided by Option::ParserGroupSize rounded up). The * consistent hashing ensures that the minimal amount of disruption * happens when we add/remove files. (Adding one file will change * exactly one bucket, etc). * * If we grouped the files, they are returned to the parent directory * as groups (along with any groups from sub-directories). Otherwise * the files are returned as ungrouped and the process repeats in the * parent. * * The idea behind Option::ParserDirGroupSizeLimit is to try to * partition the source tree into distinct chunks and only grouping * files within those chunks. So, if you don't compile one of those * chunks (say because you're not compiling tests, for example), it * won't affect the files in other chunks. Otherwise if that test code * was mixed in with the rest of the groups, they'd all miss the cache * and have to be rerun. This is a heuristic, but in practice it seems * to work well. * * Once you reach the top level, any remaining ungrouped files (along * with any top level files added in by config) are grouped together. * * Before parsing, we sort all of the groups by their summed file * size. We want to start parsing larger groups first because they'll * probably take the longest. */ // Given the path of a directory, find all (relevant) files in that // directory (and sub-directories), and attempt to group them. coro::Task<Package::GroupResult> Package::groupDirectories(std::string path) { // We're not going to be blocking on I/O here, so make sure we're // running on the thread pool. HPHP_CORO_RESCHEDULE_ON_CURRENT_EXECUTOR; GroupResult result; std::vector<coro::Task<GroupResult>> dirs; FileUtil::find( m_root, path, /* php */ true, [&] (const std::string& name, bool dir, size_t size) { if (!dir) { if (Option::PackageExcludeFiles.count(name) || Option::IsFileExcluded(name, Option::PackageExcludePatterns)) { return false; } if (!name.empty()) { auto canonFileName = FileUtil::canonicalize(String(name)).toCppString(); if (m_parsedFiles.emplace(std::move(canonFileName), true).second) { result.m_ungrouped.emplace_back(FileAndSize{name, size}); } } return true; } if (Option::PackageExcludeDirs.count(name)) return false; if (path == name || (name.size() == path.size() + 1 && name.back() == FileUtil::getDirSeparator() && name.compare(0, path.size(), path) == 0)) { // find immediately calls us back with a canonicalized version // of path; we want to ignore that, and let it proceed to // iterate the directory. return true; } // Process the directory as a new job dirs.emplace_back(groupDirectories(name)); // Don't iterate the directory in this job. return false; } ); // Coalesce the sub results for (auto& sub : HPHP_CORO_AWAIT(coro::collectRange(std::move(dirs)))) { result.m_grouped.insert( result.m_grouped.end(), std::make_move_iterator(sub.m_grouped.begin()), std::make_move_iterator(sub.m_grouped.end()) ); result.m_ungrouped.insert( result.m_ungrouped.end(), std::make_move_iterator(sub.m_ungrouped.begin()), std::make_move_iterator(sub.m_ungrouped.end()) ); } // Have we gathered enough files to assign them to groups? if (result.m_ungrouped.size() >= Option::ParserDirGroupSizeLimit) { groupFiles(result.m_grouped, std::move(result.m_ungrouped)); assertx(result.m_ungrouped.empty()); } HPHP_CORO_MOVE_RETURN(result); } // Group sets of files together using consistent hashing void Package::groupFiles(ParseGroups& groups, FileAndSizeVec files) { if (files.empty()) return; assertx(Option::ParserGroupSize > 0); // Number of buckets auto const numNew = (files.size() + (Option::ParserGroupSize - 1)) / Option::ParserGroupSize; auto const origSize = groups.size(); groups.resize(origSize + numNew); // Assign to buckets for (auto& [file, size] : files) { auto const idx = consistent_hash( hash_string_cs(file.c_str(), file.native().size()), numNew ); assertx(idx < numNew); groups[origSize + idx].m_files.emplace_back(std::move(file)); groups[origSize + idx].m_size += size; } // We could (though unlikely) have empty buckets. Remove them so we // don't have to deal with this when parsing. groups.erase( std::remove_if( groups.begin() + origSize, groups.end(), [] (const ParseGroup& g) { return g.m_files.empty(); } ), groups.end() ); // Keep the order of the files within the bucket deterministic for (size_t i = origSize; i < groups.size(); ++i) { std::sort(groups[i].m_files.begin(), groups[i].m_files.end()); } } // Parse all of the files in the given group, returning a vector of // "ondemand" files obtained from that parsing. coro::Task<Package::FileAndSizeVec> Package::parseGroups(ParseGroups groups) { if (groups.empty()) HPHP_CORO_RETURN(FileAndSizeVec{}); // Parse the groups from highest combined file size to lowest. The // larger groups will probably take longer to compile, so we want to // start those earliest. std::sort( groups.begin(), groups.end(), [] (const ParseGroup& a, const ParseGroup& b) { if (a.m_size != b.m_size) return b.m_size < a.m_size; if (a.m_files.size() != b.m_files.size()) { return b.m_files.size() < a.m_files.size(); } return a.m_files < b.m_files; } ); // Kick off the parsing. Each group gets its own sticky ticket (so // earlier groups will get scheduling priority over later ones). std::vector<coro::TaskWithExecutor<FileAndSizeVec>> tasks; for (auto& group : groups) { tasks.emplace_back( parseGroup(std::move(group)) .scheduleOn(m_async->m_executor.sticky()) ); } // Gather the on-demand files and return them FileAndSizeVec ondemand; for (auto& paths : HPHP_CORO_AWAIT(coro::collectRange(std::move(tasks)))) { ondemand.insert( ondemand.end(), std::make_move_iterator(paths.begin()), std::make_move_iterator(paths.end()) ); } HPHP_CORO_MOVE_RETURN(ondemand); } // The actual parse loop. Find the initial set of inputs (from // configuration), parse them, gather on-demand files, then repeat the // process until we have no new files to parse. Everything "under" // this call works asynchronously. void Package::parseAll() { auto work = coro::invoke( [this] () -> coro::Task<void> { // Find the initial set of groups auto groups = HPHP_CORO_AWAIT(coro::invoke( [&] () -> coro::Task<ParseGroups> { Timer timer{Timer::WallTime, "finding inputs"}; std::vector<coro::Task<GroupResult>> tasks; for (auto& dir : m_directories) { tasks.emplace_back(groupDirectories(std::move(dir))); } // Gather together all top level files GroupResult top; for (auto& result : HPHP_CORO_AWAIT(coro::collectRange(std::move(tasks)))) { top.m_grouped.insert( top.m_grouped.end(), std::make_move_iterator(result.m_grouped.begin()), std::make_move_iterator(result.m_grouped.end()) ); top.m_ungrouped.insert( top.m_ungrouped.end(), std::make_move_iterator(result.m_ungrouped.begin()), std::make_move_iterator(result.m_ungrouped.end()) ); } // If there's any ungrouped files left over, group those now groupFiles(top.m_grouped, std::move(top.m_ungrouped)); assertx(top.m_ungrouped.empty()); // Finally add in any files explicitly added via configuration // and group them. FileAndSizeVec extraFiles; for (auto& file : m_filesToParse) { if (!m_parsedFiles.insert(file).second) continue; extraFiles.emplace_back(FileAndSize{std::move(file.first), 0}); } groupFiles(top.m_grouped, std::move(extraFiles)); HPHP_CORO_RETURN(std::move(top.m_grouped)); })); // Parse the "main" round and get any ondemand files FileAndSizeVec ondemand; { Timer timer{Timer::WallTime, "parsing inputs"}; ondemand = HPHP_CORO_AWAIT(parseGroups(std::move(groups))); } if (ondemand.empty()) HPHP_CORO_RETURN_VOID; Timer timer{Timer::WallTime, "parsing on-demand"}; // We have ondemand files, so keep parsing until we have nothing // more to parse. do { assertx(groups.empty()); groupFiles(groups, std::move(ondemand)); ondemand = HPHP_CORO_AWAIT(parseGroups(std::move(groups))); } while (!ondemand.empty()); HPHP_CORO_RETURN_VOID; } ).scheduleOn(m_async->m_executor.sticky()); coro::wait(std::move(work)); } bool Package::parse() { assertx(m_async); Logger::FInfo( "parsing using {} threads using {}{}", m_async->m_executor.numThreads(), m_async->m_client.implName(), coro::using_coros ? "" : " (coros disabled!)" ); HphpSession _{Treadmill::SessionKind::CompilerEmit}; // Treat any symbol refs from systemlib as if they were part of the // original Package. for (auto& ue : SystemLib::claimRegisteredUnitEmitters()) { FileAndSizeVec ondemand; resolveOnDemand(ondemand, ue->m_symbol_refs, true); for (auto const& p : ondemand) addSourceFile(p.m_path); addUnitEmitter(std::move(ue)); } parseAll(); return !m_parseFailed.load(); } // Parse a group using extern-worker, hand off the UnitEmitter // obtained, and return any on-demand files from the parsing. coro::Task<Package::FileAndSizeVec> Package::parseGroup(ParseGroup group) { using namespace folly::gen; // Make sure we're running on the thread we should be HPHP_CORO_RESCHEDULE_ON_CURRENT_EXECUTOR; try { // First build the inputs for the job std::vector<folly::fs::path> paths; std::vector<FileMeta> metas; std::vector<coro::Task<Ref<RepoOptionsFlags>>> options; paths.reserve(group.m_files.size()); metas.reserve(group.m_files.size()); options.reserve(group.m_files.size()); for (auto& fileName : group.m_files) { assertx(!fileName.empty()); std::string fullPath; if (FileUtil::isDirSeparator(fileName.native().front())) { fullPath = fileName; } else { fullPath = m_root + fileName.native(); } struct stat sb; auto const doStat = [&] { if (lstat(fullPath.c_str(), &sb)) { if (fullPath.find(' ') == std::string::npos) { Logger::Error("Unable to stat file %s", fullPath.c_str()); } return false; } if ((sb.st_mode & S_IFMT) == S_IFDIR) { Logger::Error("Unable to parse directory: %s", fullPath.c_str()); return false; } return true; }(); if (!doStat) { Logger::FError("Fatal: Unable to stat/parse {}", fileName.native()); m_parseFailed.store(true); continue; } if (!m_extraStaticFiles.count(fileName)) { m_discoveredStaticFiles.emplace( fileName, Option::CachePHPFile ? fullPath : "" ); } Optional<std::string> targetPath; if (S_ISLNK(sb.st_mode)) { auto const target = fs::canonical(fullPath); targetPath.emplace(fs::relative(target, m_root).native()); } // Most files will have the same RepoOptions, so we cache them auto const& repoOptions = RepoOptions::forFile(fullPath.data()).flags(); options.emplace_back( m_async->m_repoOptions.get( repoOptions.cacheKeySha1(), repoOptions, HPHP_CORO_CURRENT_EXECUTOR ) ); paths.emplace_back(std::move(fullPath)); metas.emplace_back(std::move(fileName), std::move(targetPath)); } if (paths.empty()) { assertx(metas.empty()); assertx(options.empty()); HPHP_CORO_RETURN(FileAndSizeVec{}); } // Free up some memory before awaiting decltype(group.m_files){}.swap(group.m_files); auto const workItems = paths.size(); // Store the inputs and get their refs size_t readFiles = 0; size_t storedFiles = 0; auto [fileRefs, metaRefs, optionRefs, configRef] = HPHP_CORO_AWAIT(coro::collect( m_async->m_client.storeFile( std::move(paths), &readFiles, &storedFiles ), m_async->m_client.storeMulti(std::move(metas)), coro::collectRange(std::move(options)), *m_async->m_config )); assertx(fileRefs.size() == workItems); assertx(metaRefs.size() == workItems); assertx(optionRefs.size() == workItems); assertx(readFiles <= workItems); assertx(storedFiles <= workItems); m_readFiles += readFiles; m_storedFiles += storedFiles; // "Tuplize" the input refs (so they're in the format that // extern-worker expects). std::vector<decltype(g_parseJob)::InputsT> inputs; inputs.reserve(workItems); for (size_t i = 0; i < workItems; ++i) { inputs.emplace_back( std::move(fileRefs[i]), std::move(metaRefs[i]), std::move(optionRefs[i]) ); } decltype(fileRefs){}.swap(fileRefs); decltype(metaRefs){}.swap(metaRefs); decltype(optionRefs){}.swap(optionRefs); // Run the job. This does the parsing. bool cached = false; auto outputRefs = HPHP_CORO_AWAIT(m_async->m_client.exec( g_parseJob, std::make_tuple(*configRef), std::move(inputs), &cached )); assertx(outputRefs.size() == workItems); // Load the outputs auto outputs = HPHP_CORO_AWAIT(m_async->m_client.load(std::move(outputRefs))); assertx(outputs.size() == workItems); if (cached) m_cacheHits += workItems; m_total += workItems; // Process the outputs FileAndSizeVec ondemand; for (auto& [meta, wrapper] : outputs) { // The Unit had an ICE and we're configured to treat that as a // fatal error. Here is where we die on it. if (!meta.m_abort.empty()) { fprintf(stderr, "%s", meta.m_abort.c_str()); _Exit(1); } // If we produced an UnitEmitter, hand it off for whatever // processing we need to do with it. if (wrapper.m_ue) addUnitEmitter(std::move(wrapper.m_ue)); // Resolve any symbol refs into files to parse ondemand resolveOnDemand(ondemand, meta.m_symbol_refs); } HPHP_CORO_MOVE_RETURN(ondemand); } catch (const Exception& e) { Logger::FError( "Fatal: An unexpected exception was thrown while parsing: {}", e.getMessage() ); m_parseFailed.store(true); } catch (const Error& e) { Logger::FError("Extern worker error while parsing: {}", e.what()); m_parseFailed.store(true); } catch (const std::exception& e) { Logger::FError( "Fatal: An unexpected exception was thrown while parsing: {}", e.what() ); m_parseFailed.store(true); } catch (...) { Logger::Error("Fatal: An unexpected exception was thrown while parsing"); m_parseFailed.store(true); } HPHP_CORO_RETURN(FileAndSizeVec{}); } void Package::addUnitEmitter(std::unique_ptr<UnitEmitter> ue) { if (m_ar->program().get()) { HHBBC::add_unit_to_program(ue.get(), *m_ar->program()); } else { m_ar->addHhasFile(std::move(ue)); } } /////////////////////////////////////////////////////////////////////////////// void Package::resolveOnDemand(FileAndSizeVec& out, const SymbolRefs& symbolRefs, bool report) { if (!m_parseOnDemand) return; auto const& onPath = [&] (const std::string& path) { auto rpath = [&] { if (path.compare(0, m_root.length(), m_root) == 0) { return path.substr(m_root.length()); } return path; }(); if (rpath.empty()) return; if (Option::PackageExcludeFiles.count(rpath) > 0) return; if (Option::IsFileExcluded(rpath, Option::PackageExcludePatterns)) return; auto canon = FileUtil::canonicalize(String(std::move(rpath))).toCppString(); assertx(!canon.empty()); // Only parse a file once. This ensures we eventually run out // of things to parse. if (report || m_parsedFiles.emplace(canon, true).second) { auto const absolute = [&] { if (FileUtil::isDirSeparator(canon.front())) { return canon; } else { return m_root + canon; } }(); struct stat sb; if (stat(absolute.c_str(), &sb)) { Logger::FError("Unable to stat {}", absolute); m_parseFailed.store(true); return; } out.emplace_back(FileAndSize{std::move(canon), (size_t)sb.st_size}); } }; auto const onMap = [&] (auto const& syms, auto const& m) { for (auto const& sym : syms) { auto const it = m.find(sym); if (it == m.end()) continue; onPath(Option::AutoloadRoot + it->second); } }; for (auto const& [kind, syms] : symbolRefs) { switch (kind) { case SymbolRef::Include: for (auto const& name : syms) onPath(name); break; case SymbolRef::Class: onMap(syms, Option::AutoloadClassMap); break; case SymbolRef::Function: onMap(syms, Option::AutoloadFuncMap); break; case SymbolRef::Constant: onMap(syms, Option::AutoloadConstMap); break; } } } ///////////////////////////////////////////////////////////////////////////////