glean/lang/clang/preprocessor.cpp (177 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include "glean/lang/clang/preprocessor.h" #include <llvm/Config/llvm-config.h> namespace { using namespace facebook::glean::clangx; using namespace facebook::glean::cpp; struct PPCallbacks final : public clang::PPCallbacks { explicit PPCallbacks(ClangDB* d) : db(*d) {} Fact<Pp::Macro> macro(const clang::Token& name) { return db.fact<Pp::Macro>(static_cast<std::string>(name.getIdentifierInfo()->getName())); } // clang::PPCallbacks overrides void FileChanged( clang::SourceLocation loc, FileChangeReason reason, clang::SrcMgr::CharacteristicKind, clang::FileID) override { if (reason == clang::PPCallbacks::EnterFile) { db.enterFile(loc, last_include); last_include.reset(); } } #if LLVM_VERSION_MAJOR >= 11 void FileSkipped( const clang::FileEntryRef& entry, const clang::Token&, clang::SrcMgr::CharacteristicKind) override { db.skipFile(last_include, &entry.getFileEntry()); last_include.reset(); } #else void FileSkipped( const clang::FileEntry& entry, const clang::Token&, clang::SrcMgr::CharacteristicKind) override { db.skipFile(last_include, &entry); last_include.reset(); } #endif void InclusionDirective( clang::SourceLocation hashLoc, const clang::Token&, clang::StringRef, bool, clang::CharSourceRange filenameRange, const clang::FileEntry *file, clang::StringRef, clang::StringRef, const clang::Module * #if LLVM_VERSION_MAJOR >= 8 ,clang::SrcMgr::CharacteristicKind #endif ) override { last_include = ClangDB::Include{hashLoc, filenameRange, file}; } void Ifdef( clang::SourceLocation, const clang::Token& name, const clang::MacroDefinition& def) override { clang::SourceRange range(name.getLocation(), name.getEndLoc()); macroUsed(name, def, range, false); } void Ifndef( clang::SourceLocation, const clang::Token& name, const clang::MacroDefinition& def) override { clang::SourceRange range(name.getLocation(), name.getEndLoc()); macroUsed(name, def, range, false); } void MacroDefined( const clang::Token& name, const clang::MacroDirective *) override { auto src = db.srcRange(name.getLocation()); auto def = db.fact<Pp::Define>(macro(name), src.range); db.ppevent(Cxx::PPEvent::define(def), src); } void MacroUndefined( const clang::Token& name, const clang::MacroDefinition&, const clang::MacroDirective *) override { auto src = db.srcRange(name.getLocation()); auto undef = db.fact<Pp::Undef>(macro(name), src.range); db.ppevent(Cxx::PPEvent::undef(undef), src); } void macroUsed( const clang::Token& name, const clang::MacroDefinition& def, clang::SourceRange range, bool expand) { #define PROFILE_macroUsed 0 #if PROFILE_macroUsed using Clock = std::chrono::steady_clock; static std::chrono::microseconds time = std::chrono::microseconds::zero(); static size_t count = 0; const auto start = Clock::now(); #endif // Getting the location is expensive and there are a lot fewer macro // definition sites than there are macro expansions so let's cache those // locations. folly::Optional<Src::Loc> defloc; if (auto info = def.getMacroInfo()) { defloc = folly::get_optional(macros, info); if (!defloc.has_value()) { defloc = db.srcLoc(info->getDefinitionLoc()); macros.insert({info, defloc.value()}); } } // We absolutely don't want to let Clang resolve nested macro expansion // ranges here (via db.srcRange -> getExpansionRange) as doing so turned out // to be horrendously expensive. Instead, when we see a top-level expansion // (the range isn't isMacroID) we resolve it ourselves and store it in // 'expansion'. Subsequent nested expansions must be part of this top-level // expansion so we just use that range. There is a subtlety with how we // handle expansions in macro arguments, see comments below. This is a // massive win in performance - at one time, this function accounted for // >40% of the running time in Strobelight (cf. T59197014). const ClangDB::SourceRange src = (range.getBegin().isMacroID() && expansion.has_value()) // This is part of the current top-level expansion, just use its // range. ? expansion.value() // Manually convert this to a CharSourceRange and call the more // efficient immediateSourceRange rather than srcRange. : db.immediateSrcRange( clang::CharSourceRange::getCharRange( { // getBegin points at the start of the first token range.getBegin(), // getEnd points at the start of the last token. range.getEnd().getLocWithOffset( // Skip over the last token to make this an proper (exclusive) // char range. range.getBegin() == range.getEnd() // The macro expansion range is only one token which must be // the macro name. ? name.getLength() // Multiple tokens which means the last token must be the // closing parenthesis. : 1) })); if (range.getBegin().isMacroID() && !expansion.has_value()) { // This really shouldn't happen. LOG(ERROR) << "Unexpected nested macro expansion at " << range.printToString(db.sourceManager()); } // Don't update expansion if this is an expansion of a macro argument. // Consider: // // #define ONE 1 // #define TWO 2 // #define FOO ONE // #define MACRO(x) x+TWO // int y = MACRO(FOO); // // Here, we get the following calls: // // MACRO(...) - the outer expansion (not isMacroID) // FOO - argument (not isMacroID) // ONE - definition of FOO (isMacroID) // TWO - definition of MACRO(x) (isMacroID) // // We set expansion in the MACRO(...) call but if we then update it in // the FOO call, we'd assign the (nested) expansion of TWO to the range // of FOO. Instead, we don't update and assign the expansions of both // ONE and TWO (but not FOO!) to the range of MACRO(FOO). This is arguably // slightly less wrong. There doesn't seem to be an easy way to do better // without actually resolving the MacroID ranges. Perhaps we could hook // into the lexer somehow. if (!range.getBegin().isMacroID() && (!expansion.has_value() || src.file != expansion->file || src.span.start + src.span.length > expansion->span.start + expansion->span.length)) { expansion = src; } // Resolve the name range manually, too. For top-level expansions, it's // the (inclusive for now) range of the name token. For nested expansions // the current schema is broken anyway - we assign the range of the // top-level expansion (it should be spelling file + range). const auto name_r = name.getLocation().isMacroID() ? src : db.immediateSrcRange( clang::CharSourceRange::getCharRange({ name.getLocation(), name.getEndLoc() })); auto use = db.fact<Pp::Use>( macro(name), Src::ByteRange{name_r.span.start, name_r.span.start + name_r.span.length}, maybe(defloc), expand, src.range); db.ppevent(Cxx::PPEvent::use(use), src); #if PROFILE_macroUsed const auto end = Clock::now(); time += std::chrono::duration_cast<std::chrono::microseconds>(end - start); ++count; if ((count % 10000) == 0) { LOG(INFO) << "macroUsed " << time.count() << "us (" << count << ")"; } #endif } void MacroExpands( const clang::Token& name, const clang::MacroDefinition& def, clang::SourceRange range, const clang::MacroArgs *) override { macroUsed(name, def, range, true); } void Defined( const clang::Token& name, const clang::MacroDefinition& def, clang::SourceRange range) override { macroUsed(name, def, range, false); } ClangDB& db; folly::Optional<ClangDB::Include> last_include; // The range of the current top-level macro expansion (see comments in // macroUsed). folly::Optional<ClangDB::SourceRange> expansion; // Cached locations of macro definitions (see macroUsed). folly::F14FastMap<clang::MacroInfo *, Src::Loc> macros; }; } namespace facebook { namespace glean { namespace clangx { std::unique_ptr<clang::PPCallbacks> newPPCallbacks(ClangDB* db) { return std::make_unique<PPCallbacks>(db); } } } }