int commit_interleaver::index_parent_tree_items()

in src/commit_interleaver.h [465:575]


int commit_interleaver::index_parent_tree_items(
    int head_p, int p, dir_mask source_dirs, bool source_includes_root,
    int &inactive_p, sha1_ref parent, git_tree &tree,
    std::array<int, dir_mask::max_size> &parent_for_d,
    std::bitset<max_parents> &contributed, const std::vector<int> &revs) {
  assert(!parent->is_zeros());
  tree.sha1 = parent;
  if (cache.ls_tree(tree))
    return 1;

  auto update_p = [&](int &dir_p, int p) {
    dir_p = p;
    contributed.set(p);
  };
  auto get_dir_p = [&](int d) -> int & {
    return dirs.active_dirs.test(d) ? parent_for_d[d] : inactive_p;
  };
  for (int i = 0; i < tree.num_items; ++i) {
    auto &item = tree.items[i];

    // Optimization: skip the directory lookup if this source is contributing
    // the monorepo root.
    if (source_includes_root && item.type != git_tree::item_type::tree)
      continue;

    int d = dirs.find_dir(item.name);
    if (d == -1)
      return error("no monorepo root to claim undeclared directory '" +
                    std::string(item.name) + "' in " +
                    parent->to_string());
    if (!dirs.list[d].is_root)
      if (item.type != git_tree::item_type::tree)
        return error("invalid non-tree for directory '" +
                      std::string(item.name) + "' in " +
                      parent->to_string());

    // The base commit takes priority even if we haven't seen it in a
    // first-parent commit yet.
    //
    // TODO: add a test where the base directory is possibly inactive,
    // because there are non-first-parent commits that get mapped ahead of
    // time.
    if (source_dirs.test(d))
      continue;

    int &dir_p = get_dir_p(d);

    // Check for a second object from the monorepo root.
    if (dir_p == p)
      continue;

    // Use the first parent found that has content for a directory.
    if (dir_p == -1) {
      update_p(dir_p, p);
      continue;
    }
    if (head_p == -1)
      assert(p > 0);
    else
      assert(p != head_p);

    // The first parent processed (which is the head, if any) wins for tracked
    // directories.
    //
    // TODO: add a testcase where a side-history commit (i.e., head_p is -1)
    // has a second parent with a higher rev than the first parent and
    // different content for a tracked directory.  Confirm the first parent's
    // version of the directory is used.
    //
    // FIXME: this logic is insufficient to make the following case sane:
    //
    //  - branch A is LLVM upstream
    //  - branch B tracks llvm and clang; is downstream of A
    //  - branch C tracks llvm (only)   ; is downstream of B
    //  - branch C sometimes merges directly from A
    //
    // since the clang in branch C will swing seemingly arbitrarily between a
    // version from A and a version from B, depending on the last merge.
    //
    // Instead, we'd want C to always pick the most recent B for its clang.
    // But we don't currently have a way to distinguish that.  Maybe there's
    // a way to annotate the LLVM svnbaserev with a branch depth, extending
    // the concept that a negative svnbaserev takes priority over a positive
    // one.
    if (dirs.active_dirs.test(d))
      continue;

    // Look up revs to pick a winner.
    //
    // Revs are stored signed, where negative indicates the parent itself is
    // not a commit from upstream LLVM (positive indicates that it is).
    const int old_srev = revs[dir_p];
    const int new_srev = revs[p];
    const int new_rev = new_srev < 0 ? -new_srev : new_srev;
    const int old_rev = old_srev < 0 ? -old_srev : old_srev;

    // Newer base SVN revision wins.
    if (old_rev > new_rev)
      continue;

    // If it's the same revision, prefer downstream content, then prior
    // parents.  Return early if we're not changing anything.
    if (old_rev == new_rev)
      if (old_srev <= 0 || new_srev >= 0)
        continue;

    // Change the parent.
    update_p(dir_p, p);
  }
  return 0;
}