glean/lang/clang/db.cpp (318 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "glean/lang/clang/db.h"
#include "glean/lang/clang/path.h"
namespace facebook {
namespace glean {
namespace clangx {
namespace {
std::filesystem::path subpath(
const folly::Optional<std::string>& subdir,
clang::StringRef path) {
// This returns 'path' if it is absolute or if 'subdir' is empty and
// 'subdir'/'path' otherwise.
std::filesystem::path p(static_cast<std::string>(path));
return p.is_absolute() || !subdir
? p
: std::filesystem::path(subdir.value()) / p;
}
}
Fact<Src::File> ClangDB::fileFromEntry(
const clang::FileEntry& entry) {
// Clang files have a Name and *maybe* a RealPathName (which seems to be
// Name with symlinks resolved). For fbcode sources, RealPathName tends to
// be what we want for sources (RealPathName could be "folly/File.h" and
// Name would be the symlink under "buck-out"). For other things, we tend
// to want Name (e.g., "third-party-buck/.../basic_ios.h" rather than
// "/mnt/gvfs/.../basic_ios.h").
//
// TODO: Do we want to resolve symlinks ourselves instead of using
// tryGetRealPathName?
auto path = goodPath(root, subpath(subdir, entry.getName()));
auto real = entry.tryGetRealPathName();
if (!real.empty()) {
path = betterPath(goodPath(root, subpath(subdir, real)), path);
}
if (path_prefix.has_value()) {
path = std::filesystem::path(path_prefix.value()) / path;
}
const auto file = batch.fact<Src::File>(path.native());
// define FileLines
#if LLVM_VERSION_MAJOR >= 12
auto bufferOpt = sourceManager().getMemoryBufferForFileOrNone(&entry);
if (bufferOpt.hasValue()) {
auto buffer = &(bufferOpt.getValue());
#else
bool invalid = false;
auto buffer = sourceManager().getMemoryBufferForFile(&entry, &invalid);
if (buffer != nullptr && !invalid) {
#endif
std::vector<uint64_t> lengths;
bool hasUnicodeOrTabs = false;
auto p = buffer->getBufferStart();
const auto n = buffer->getBufferSize();
uint64_t len = 0;
for (size_t i = 0; i < n; ++i) {
const auto c = *p;
++p;
++len;
if (c == '\n') {
// NOTE: We include the terminating '\n' in the length to ensure that
// sum(lengths) == file size.
lengths.push_back(len);
len = 0;
} else if (c == '\t' || (c&0x80) != 0) {
hasUnicodeOrTabs = true;
}
}
if (len != 0) {
lengths.push_back(len);
}
batch.fact<Src::FileLines>(file, lengths, len==0, hasUnicodeOrTabs);
} else {
LOG(WARNING) << "couldn't get MemoryBuffer for " << path.native();
}
return file;
}
void ClangDB::ppevent(
PrePPEvent event,
SourceRange range) {
if (range.file) {
range.file->events.push_back(std::move(event));
}
}
void ClangDB::include(
const Include& inc,
Fact<Src::File> file,
folly::Optional<clang::FileID> id) {
auto full_range = inc.name;
full_range.setBegin(inc.hash);
const auto range = srcRange(full_range);
const auto name_range = srcRange(inc.name);
const auto include = fact<Pp::Include>(
file,
Src::ByteRange{
name_range.span.start,
name_range.span.start + name_range.span.length},
range.range);
ppevent(
id
? PrePPEvent(PreInclude{include, id.value()})
: PrePPEvent(
Cxx::PPEvent::include_(Cxx::IncludeTrace{include, nothing()})),
range);
}
void ClangDB::enterFile(
clang::SourceLocation loc, folly::Optional<Include> inc) {
auto id = sourceManager().getFileID(loc);
if (auto r = physicalFile(id)) {
file_data.push_back(FileData{id, r.value(), {}, {}, {}, folly::none});
files.insert({id, &file_data.back()});
if (inc && inc->entry != nullptr &&
sourceManager().getFileEntryForID(id) == inc->entry) {
include(inc.value(), r.value(), id);
}
}
}
void ClangDB::skipFile(
folly::Optional<Include> inc, const clang::FileEntry *entry) {
if (inc && inc->entry != nullptr && inc->entry == entry) {
include(inc.value(), fileFromEntry(*entry), folly::none);
}
}
void ClangDB::xref(
clang::SourceRange r,
folly::Optional<clang::SourceLocation> loc,
Cxx::XRefTarget target) {
auto range = srcRange(r);
if (range.file) {
range.file->xrefs.push_back(CrossRef{
range.span,
loc && srcRange(loc.value()).file == range.file,
target});
}
}
clang::SourceRange ClangDB::rangeOfToken(clang::SourceRange range) const {
auto start = range.getBegin();
if (!start.isMacroID()) {
auto end = clang::Lexer::getLocForEndOfToken(
start,
1,
sourceManager(),
compilerInstance.getLangOpts());
return clang::SourceRange(start, end);
} else {
return range;
}
}
clang::SourceRange ClangDB::spellingRange(clang::SourceRange range) const {
return clang::SourceRange(
sourceManager().getSpellingLoc(range.getBegin()),
sourceManager().getSpellingLoc(range.getEnd())
);
}
clang::StringRef ClangDB::srcText(clang::SourceRange range) const {
if (range.getBegin().isMacroID()) {
// look for the text of a macro at the macro definition (spelling location)
range = spellingRange(range);
}
auto token_range = clang::CharSourceRange::getTokenRange(range);
return clang::Lexer::getSourceText(
token_range,
sourceManager(),
compilerInstance.getLangOpts()
);
}
Src::Loc ClangDB::srcLoc(clang::SourceLocation loc) {
auto range = srcRange(loc);
return Src::Loc{
range.range.file,
range.range.lineBegin,
range.range.columnBegin
};
}
ClangDB::SourceRange ClangDB::immediateSrcRange(
clang::CharSourceRange range) {
const auto [file_id, begin_offset] =
sourceManager().getDecomposedLoc(range.getBegin());
auto end_loc =
range.isTokenRange()
// In token ranges, getEnd points to the first character of the last
// token so skip it.
? clang::Lexer::getLocForEndOfToken(
range.getEnd(),
0,
sourceManager(),
compilerInstance.getLangOpts())
// In char ranges, it already points past the end of the range.
: range.getEnd();
// TODO: What should we do if it's invalid?
if (!end_loc.isValid()) {
end_loc = range.getBegin();
}
unsigned end_offset;
if (!sourceManager().isInFileID(end_loc, file_id, &end_offset)) {
// FIXME: what *should* we do if the end of the range is in a different
// file?
end_offset = begin_offset;
}
assert(end_offset >= begin_offset);
const auto data = folly::get_default(files, file_id, nullptr);
const unsigned last_char_offset =
end_offset > begin_offset ? end_offset - 1 : begin_offset;
return SourceRange{
data,
Src::ByteSpan{begin_offset, end_offset - begin_offset},
Src::Range {
data ? data->fact : file(file_id),
// FIXME: This is quite expensive and not always used. We should do this
// on demand.
sourceManager().getLineNumber(file_id, begin_offset),
sourceManager().getColumnNumber(file_id, begin_offset),
sourceManager().getLineNumber(file_id, last_char_offset),
sourceManager().getColumnNumber(file_id, last_char_offset)
}
};
}
namespace {
Src::ByteSpans byteSpans(std::vector<Src::ByteSpan> v) {
std::sort(v.begin(), v.end());
v.erase(std::unique(v.begin(), v.end()), v.end());
std::vector<Src::RelByteSpan> spans;
spans.reserve(v.size());
size_t offset = 0;
for (const auto& span : v) {
assert(span.start >= offset);
spans.push_back(Src::RelByteSpan{span.start - offset, span.length});
offset = span.start;
}
return spans;
}
using RefMap = std::map<Cxx::XRefTarget, std::vector<Src::ByteSpan>>;
std::vector<Cxx::FixedXRef> finishRefs(RefMap&& map) {
std::vector<Cxx::FixedXRef> refs;
refs.reserve(map.size());
for (auto& x : map) {
refs.push_back(Cxx::FixedXRef{
x.first,
byteSpans(std::move(x.second))});
}
map.clear();
std::sort(refs.begin(), refs.end(),
[](const auto& x, const auto& y) { return x.ranges < y.ranges; });
return refs;
}
}
void ClangDB::finish() {
auto release = [](auto& vec) {
typename std::decay<decltype(vec)>::type tmp;
tmp.swap(vec);
};
const auto main_id = sourceManager().getMainFileID();
auto tunit = fact<Buck::TranslationUnit>(
file(main_id),
locator,
maybe(platform)
);
std::vector<Fact<Cxx::FileXRefs>> tunitXRefs;
for (auto& file : folly::range(file_data.rbegin(), file_data.rend())) {
auto& xrefs = file.xrefs;
if (!xrefs.empty()) {
RefMap locals;
RefMap externals;
for (const auto& xref : xrefs) {
if (xref.local) {
locals[xref.target].push_back(xref.span);
} else {
externals[xref.target].push_back(xref.span);
}
}
auto local_refs = finishRefs(std::move(locals));
auto external_refs = finishRefs(std::move(externals));
std::vector<Cxx::XRefTarget> external_targets;
std::vector<Src::ByteSpans> external_spans;
external_targets.reserve(external_refs.size());
external_spans.reserve(external_refs.size());
for (auto& ext : external_refs) {
external_targets.push_back(ext.target);
external_spans.push_back(std::move(ext.ranges));
}
auto xmap = fact<Cxx::FileXRefMap>(
file.fact,
std::move(local_refs),
std::move(external_spans));
auto fileXRefs = fact<Cxx::FileXRefs>(
xmap,
std::move(external_targets));
tunitXRefs.push_back(fileXRefs);
}
release(xrefs);
auto& decls = file.declarations;
std::sort(decls.begin(), decls.end());
auto decl_trace = fact<Cxx::Declarations>(
folly::gen::from(decls)
| folly::gen::mapped([](const auto& x) { return x.second; })
| folly::gen::as<std::vector>());
release(decls);
auto resolve = [&](const auto &x) {
struct Resolver : boost::static_visitor<Cxx::PPEvent> {
auto operator()(const Cxx::PPEvent& x) const {
return x;
}
auto operator()(const PreInclude& x) const {
folly::Optional<Fact<Cxx::Trace>> trace;
if (auto p = folly::get_default(db->files, x.file, nullptr)) {
if (p->trace) {
trace = p->trace.value();
} else {
LOG(WARNING) << "unresolved include";
}
} else {
LOG(WARNING) << "unknown include";
}
return Cxx::PPEvent::include_(
Cxx::IncludeTrace{x.include, maybe(trace)});
}
ClangDB *db;
};
Resolver r;
r.db = this;
return boost::apply_visitor(r, x);
};
auto pp_trace = fact<Cxx::PPTrace>(
file.fact,
folly::gen::from(file.events)
| folly::gen::mapped(resolve)
| folly::gen::as<std::vector>());
release(file.events);
file.trace = fact<Cxx::Trace>(file.fact, decl_trace, pp_trace);
}
fact<Cxx::TranslationUnitXRefs>(tunit, std::move(tunitXRefs));
if (auto p = folly::get_default(files, main_id, nullptr)) {
if (p->trace) {
fact<Cxx::TranslationUnitTrace>(tunit, p->trace.value());
} else {
LOG(WARNING) << "translation unit has no trace";
}
} else {
LOG(WARNING) << "translation unit has no file data";
}
}
}
}
}