Fact ClangDB::fileFromEntry()

in glean/lang/clang/db.cpp [31:391]


Fact<Src::File> ClangDB::fileFromEntry(
    const clang::FileEntry& entry) {
  // Clang files have a Name and *maybe* a RealPathName (which seems to be
  // Name with symlinks resolved). For fbcode sources, RealPathName tends to
  // be what we want for sources (RealPathName could be "folly/File.h" and
  // Name would be the symlink under "buck-out"). For other things, we tend
  // to want Name (e.g., "third-party-buck/.../basic_ios.h" rather than
  // "/mnt/gvfs/.../basic_ios.h").
  //
  // TODO: Do we want to resolve symlinks ourselves instead of using
  // tryGetRealPathName?
  auto path = goodPath(root, subpath(subdir, entry.getName()));
  auto real = entry.tryGetRealPathName();
  if (!real.empty()) {
    path = betterPath(goodPath(root, subpath(subdir, real)), path);
  }
  if (path_prefix.has_value()) {
     path = std::filesystem::path(path_prefix.value()) / path;
  }
  const auto file = batch.fact<Src::File>(path.native());

  // define FileLines
  #if LLVM_VERSION_MAJOR >= 12
  auto bufferOpt = sourceManager().getMemoryBufferForFileOrNone(&entry);
  if (bufferOpt.hasValue()) {
    auto buffer = &(bufferOpt.getValue());
  #else
  bool invalid = false;
  auto buffer = sourceManager().getMemoryBufferForFile(&entry, &invalid);
  if (buffer != nullptr && !invalid) {
  #endif
    std::vector<uint64_t> lengths;
    bool hasUnicodeOrTabs = false;
    auto p = buffer->getBufferStart();
    const auto n = buffer->getBufferSize();
    uint64_t len = 0;
    for (size_t i = 0; i < n; ++i) {
      const auto c = *p;
      ++p;
      ++len;
      if (c == '\n') {
        // NOTE: We include the terminating '\n' in the length to ensure that
        // sum(lengths) == file size.
        lengths.push_back(len);
        len = 0;
      } else if (c == '\t' || (c&0x80) != 0) {
        hasUnicodeOrTabs = true;
      }
    }
    if (len != 0) {
      lengths.push_back(len);
    }
    batch.fact<Src::FileLines>(file, lengths, len==0, hasUnicodeOrTabs);
  } else {
    LOG(WARNING) << "couldn't get MemoryBuffer for " << path.native();
  }

  return file;
}

void ClangDB::ppevent(
    PrePPEvent event,
    SourceRange range) {
  if (range.file) {
    range.file->events.push_back(std::move(event));
  }
}

void ClangDB::include(
    const Include& inc,
    Fact<Src::File> file,
    folly::Optional<clang::FileID> id) {
  auto full_range = inc.name;
  full_range.setBegin(inc.hash);
  const auto range = srcRange(full_range);
  const auto name_range = srcRange(inc.name);
  const auto include = fact<Pp::Include>(
    file,
    Src::ByteRange{
      name_range.span.start,
      name_range.span.start + name_range.span.length},
    range.range);
  ppevent(
    id
      ? PrePPEvent(PreInclude{include, id.value()})
      : PrePPEvent(
          Cxx::PPEvent::include_(Cxx::IncludeTrace{include, nothing()})),
    range);
}

void ClangDB::enterFile(
    clang::SourceLocation loc, folly::Optional<Include> inc) {
  auto id = sourceManager().getFileID(loc);
  if (auto r = physicalFile(id)) {
    file_data.push_back(FileData{id, r.value(), {}, {}, {}, folly::none});
    files.insert({id, &file_data.back()});
    if (inc && inc->entry != nullptr &&
          sourceManager().getFileEntryForID(id) == inc->entry) {
      include(inc.value(), r.value(), id);
    }
  }
}

void ClangDB::skipFile(
    folly::Optional<Include> inc, const clang::FileEntry *entry) {
  if (inc && inc->entry != nullptr && inc->entry == entry) {
    include(inc.value(), fileFromEntry(*entry), folly::none);
  }
}

void ClangDB::xref(
    clang::SourceRange r,
    folly::Optional<clang::SourceLocation> loc,
    Cxx::XRefTarget target) {
  auto range = srcRange(r);
  if (range.file) {
    range.file->xrefs.push_back(CrossRef{
      range.span,
      loc && srcRange(loc.value()).file == range.file,
      target});
  }
}

clang::SourceRange ClangDB::rangeOfToken(clang::SourceRange range) const {
  auto start = range.getBegin();
  if (!start.isMacroID()) {
    auto end = clang::Lexer::getLocForEndOfToken(
      start,
      1,
      sourceManager(),
      compilerInstance.getLangOpts());
    return clang::SourceRange(start, end);
  } else {
    return range;
  }
}

clang::SourceRange ClangDB::spellingRange(clang::SourceRange range) const {
  return clang::SourceRange(
    sourceManager().getSpellingLoc(range.getBegin()),
    sourceManager().getSpellingLoc(range.getEnd())
  );
}

clang::StringRef ClangDB::srcText(clang::SourceRange range) const {
  if (range.getBegin().isMacroID()) {
    // look for the text of a macro at the macro definition (spelling location)
    range = spellingRange(range);
  }
  auto token_range = clang::CharSourceRange::getTokenRange(range);
  return clang::Lexer::getSourceText(
    token_range,
    sourceManager(),
    compilerInstance.getLangOpts()
  );
}

Src::Loc ClangDB::srcLoc(clang::SourceLocation loc) {
  auto range = srcRange(loc);
  return Src::Loc{
    range.range.file,
    range.range.lineBegin,
    range.range.columnBegin
  };
}

ClangDB::SourceRange ClangDB::immediateSrcRange(
    clang::CharSourceRange range) {
  const auto [file_id, begin_offset] =
    sourceManager().getDecomposedLoc(range.getBegin());

  auto end_loc =
    range.isTokenRange()
        // In token ranges, getEnd points to the first character of the last
        // token so skip it.
      ? clang::Lexer::getLocForEndOfToken(
          range.getEnd(),
          0,
          sourceManager(),
          compilerInstance.getLangOpts())

        // In char ranges, it already points past the end of the range.
      : range.getEnd();

  // TODO: What should we do if it's invalid?
  if (!end_loc.isValid()) {
    end_loc = range.getBegin();
  }

  unsigned end_offset;
  if (!sourceManager().isInFileID(end_loc, file_id, &end_offset)) {
    // FIXME: what *should* we do if the end of the range is in a different
    // file?
    end_offset = begin_offset;
  }
  assert(end_offset >= begin_offset);

  const auto data = folly::get_default(files, file_id, nullptr);
  const unsigned last_char_offset =
    end_offset > begin_offset ? end_offset - 1 : begin_offset;
  return SourceRange{
    data,
    Src::ByteSpan{begin_offset, end_offset - begin_offset},
    Src::Range {
      data ? data->fact : file(file_id),
      // FIXME: This is quite expensive and not always used. We should do this
      // on demand.
      sourceManager().getLineNumber(file_id, begin_offset),
      sourceManager().getColumnNumber(file_id, begin_offset),
      sourceManager().getLineNumber(file_id, last_char_offset),
      sourceManager().getColumnNumber(file_id, last_char_offset)
    }
  };
}

namespace {

Src::ByteSpans byteSpans(std::vector<Src::ByteSpan> v) {
  std::sort(v.begin(), v.end());
  v.erase(std::unique(v.begin(), v.end()), v.end());
  std::vector<Src::RelByteSpan> spans;
  spans.reserve(v.size());
  size_t offset = 0;
  for (const auto& span : v) {
    assert(span.start >= offset);
    spans.push_back(Src::RelByteSpan{span.start - offset, span.length});
    offset = span.start;
  }
  return spans;
}

using RefMap = std::map<Cxx::XRefTarget, std::vector<Src::ByteSpan>>;

std::vector<Cxx::FixedXRef> finishRefs(RefMap&& map) {
  std::vector<Cxx::FixedXRef> refs;
  refs.reserve(map.size());
  for (auto& x : map) {
    refs.push_back(Cxx::FixedXRef{
      x.first,
      byteSpans(std::move(x.second))});
  }
  map.clear();
  std::sort(refs.begin(), refs.end(),
    [](const auto& x, const auto& y) { return x.ranges < y.ranges; });
  return refs;
}


}

void ClangDB::finish() {
  auto release = [](auto& vec) {
    typename std::decay<decltype(vec)>::type tmp;
    tmp.swap(vec);
  };

  const auto main_id = sourceManager().getMainFileID();
  auto tunit = fact<Buck::TranslationUnit>(
    file(main_id),
    locator,
    maybe(platform)
  );

  std::vector<Fact<Cxx::FileXRefs>> tunitXRefs;

  for (auto& file : folly::range(file_data.rbegin(), file_data.rend())) {
    auto& xrefs = file.xrefs;
    if (!xrefs.empty()) {
      RefMap locals;
      RefMap externals;

      for (const auto& xref : xrefs) {
        if (xref.local) {
          locals[xref.target].push_back(xref.span);
        } else {
          externals[xref.target].push_back(xref.span);
        }
      }

      auto local_refs = finishRefs(std::move(locals));
      auto external_refs = finishRefs(std::move(externals));

      std::vector<Cxx::XRefTarget> external_targets;
      std::vector<Src::ByteSpans> external_spans;
      external_targets.reserve(external_refs.size());
      external_spans.reserve(external_refs.size());
      for (auto& ext : external_refs) {
        external_targets.push_back(ext.target);
        external_spans.push_back(std::move(ext.ranges));
      }

      auto xmap = fact<Cxx::FileXRefMap>(
        file.fact,
        std::move(local_refs),
        std::move(external_spans));
      auto fileXRefs = fact<Cxx::FileXRefs>(
        xmap,
        std::move(external_targets));
      tunitXRefs.push_back(fileXRefs);
    }
    release(xrefs);

    auto& decls = file.declarations;
    std::sort(decls.begin(), decls.end());
    auto decl_trace = fact<Cxx::Declarations>(
      folly::gen::from(decls)
        | folly::gen::mapped([](const auto& x) { return x.second; })
        | folly::gen::as<std::vector>());
    release(decls);

    auto resolve = [&](const auto &x) {
      struct Resolver : boost::static_visitor<Cxx::PPEvent> {
        auto operator()(const Cxx::PPEvent& x) const {
          return x;
        }

        auto operator()(const PreInclude& x) const {
          folly::Optional<Fact<Cxx::Trace>> trace;
          if (auto p = folly::get_default(db->files, x.file, nullptr)) {
            if (p->trace) {
              trace = p->trace.value();
            } else {
              LOG(WARNING) << "unresolved include";
            }
          } else {
            LOG(WARNING) << "unknown include";
          }
          return Cxx::PPEvent::include_(
            Cxx::IncludeTrace{x.include, maybe(trace)});
        }

        ClangDB *db;
      };
      Resolver r;
      r.db = this;
      return boost::apply_visitor(r, x);
    };
    auto pp_trace = fact<Cxx::PPTrace>(
      file.fact,
      folly::gen::from(file.events)
        | folly::gen::mapped(resolve)
        | folly::gen::as<std::vector>());
    release(file.events);
    file.trace = fact<Cxx::Trace>(file.fact, decl_trace, pp_trace);
  }

  fact<Cxx::TranslationUnitXRefs>(tunit, std::move(tunitXRefs));

  if (auto p = folly::get_default(files, main_id, nullptr)) {
    if (p->trace) {
      fact<Cxx::TranslationUnitTrace>(tunit, p->trace.value());
    } else {
      LOG(WARNING) << "translation unit has no trace";
    }
  } else {
    LOG(WARNING) << "translation unit has no file data";
  }
}


}