in eden/fs/inodes/TreeInode.cpp [2201:2532]
Future<Unit> TreeInode::computeDiff(
folly::Synchronized<TreeInodeState>::LockedPtr contentsLock,
DiffContext* context,
RelativePathPiece currentPath,
shared_ptr<const Tree> tree,
std::unique_ptr<GitIgnoreStack> ignore,
bool isIgnored) {
XDCHECK(isIgnored || ignore != nullptr)
<< "the ignore stack is required if this directory is not ignored";
// A list of entries that have been removed
std::vector<const TreeEntry*> removedEntries;
// A list of untracked files
std::vector<PathComponent> untrackedFiles;
// A list of ignored files
std::vector<PathComponent> ignoredFiles;
// A list of modified files
std::vector<PathComponent> modifiedFiles;
std::vector<std::unique_ptr<DeferredDiffEntry>> deferredEntries;
auto self = inodePtrFromThis();
// Grab the contents_ lock, and loop to find children that might be
// different. In this first pass we primarily build the list of children to
// examine, but we wait until after we release our contents_ lock to actually
// examine any children InodeBase objects.
std::vector<IncompleteInodeLoad> pendingLoads;
{
// Move the contents lock into a variable inside this scope so it
// will be released at the end of this scope.
//
// Even though diffing conceptually seems like a read-only operation, we
// need a write lock since we may have to load child inodes, affecting
// their entry state.
//
// This lock is held while we are processing the child
// entries of the tree. The lock is released once we have constructed all of
// the needed `DeferredDiffEntry` objects, and then these are manually ran,
// which themselves call `diff()` with their internal `TreeInode`. This is
// done so we are not recursively holding the `contents` lock, which could
// lead to deadlock.
auto contents = std::move(contentsLock);
auto processUntracked = [&](PathComponentPiece name, DirEntry* inodeEntry) {
bool entryIgnored = isIgnored;
auto fileType = inodeEntry->isDirectory() ? GitIgnore::TYPE_DIR
: GitIgnore::TYPE_FILE;
auto entryPath = currentPath + name;
if (!isIgnored) {
auto ignoreStatus = ignore->match(entryPath, fileType);
if (ignoreStatus == GitIgnore::HIDDEN) {
// Completely skip over hidden entries.
// This is used for reserved directories like .hg and .eden
XLOG(DBG9) << "diff: hidden entry: " << entryPath;
return;
}
entryIgnored = (ignoreStatus == GitIgnore::EXCLUDE);
}
if (inodeEntry->isDirectory()) {
if (!entryIgnored || context->listIgnored) {
if (auto childPtr = inodeEntry->getInodePtr()) {
deferredEntries.emplace_back(
DeferredDiffEntry::createUntrackedEntryFromInodeFuture(
context,
entryPath,
std::move(childPtr),
ignore.get(),
entryIgnored));
} else if (inodeEntry->isMaterialized()) {
auto inodeFuture = self->loadChildLocked(
contents->entries,
name,
*inodeEntry,
pendingLoads,
context->getFetchContext());
deferredEntries.emplace_back(
DeferredDiffEntry::createUntrackedEntryFromInodeFuture(
context,
entryPath,
std::move(inodeFuture),
ignore.get(),
entryIgnored));
} else {
// This entry is present locally but not in the source control tree.
// The current Inode is not materialized so do not load inodes and
// instead use the source control differ.
// Collect this future to complete with other
// deferred entries.
deferredEntries.emplace_back(DeferredDiffEntry::createAddedScmEntry(
context,
entryPath,
inodeEntry->getHash(),
ignore.get(),
entryIgnored));
}
}
} else {
if (!entryIgnored) {
XLOG(DBG8) << "diff: untracked file: " << entryPath;
context->callback->addedFile(entryPath);
} else if (context->listIgnored) {
XLOG(DBG9) << "diff: ignored file: " << entryPath;
context->callback->ignoredFile(entryPath);
} else {
// Don't bother reporting this ignored file since
// listIgnored is false.
}
}
};
auto processRemoved = [&](const TreeEntry& scmEntry) {
if (scmEntry.isTree()) {
deferredEntries.emplace_back(DeferredDiffEntry::createRemovedScmEntry(
context, currentPath + scmEntry.getName(), scmEntry.getHash()));
} else {
XLOG(DBG5) << "diff: removed file: "
<< currentPath + scmEntry.getName();
context->callback->removedFile(currentPath + scmEntry.getName());
}
};
auto processBothPresent = [&](const TreeEntry& scmEntry,
DirEntry* inodeEntry) {
// We only need to know the ignored status if this is a directory.
// If this is a regular file on disk and in source control, then it
// is always included since it is already tracked in source control.
bool entryIgnored = isIgnored;
auto entryPath = currentPath + scmEntry.getName();
if (!isIgnored && (inodeEntry->isDirectory() || scmEntry.isTree())) {
auto fileType = inodeEntry->isDirectory() ? GitIgnore::TYPE_DIR
: GitIgnore::TYPE_FILE;
auto ignoreStatus = ignore->match(entryPath, fileType);
if (ignoreStatus == GitIgnore::HIDDEN) {
// This is rather unexpected. We don't expect to find entries in
// source control using reserved hidden names.
// Treat this as ignored for now.
entryIgnored = true;
} else if (ignoreStatus == GitIgnore::EXCLUDE) {
entryIgnored = true;
} else {
entryIgnored = false;
}
}
if (inodeEntry->getInode()) {
// This inode is already loaded.
auto childInodePtr = inodeEntry->getInodePtr();
deferredEntries.emplace_back(DeferredDiffEntry::createModifiedEntry(
context,
entryPath,
scmEntry,
std::move(childInodePtr),
ignore.get(),
entryIgnored));
} else if (inodeEntry->isMaterialized()) {
// This inode is not loaded but is materialized.
// We'll have to load it to confirm if it is the same or different.
auto inodeFuture = self->loadChildLocked(
contents->entries,
scmEntry.getName(),
*inodeEntry,
pendingLoads,
context->getFetchContext());
deferredEntries.emplace_back(
DeferredDiffEntry::createModifiedEntryFromInodeFuture(
context,
entryPath,
scmEntry,
std::move(inodeFuture),
ignore.get(),
entryIgnored));
} else if (
// Eventually the mode will come from inode metadata storage,
// not from the directory entry. However, any source-control-visible
// metadata changes will cause the inode to be materialized, and
// the previous path will be taken.
treeEntryTypeFromMode(inodeEntry->getInitialMode()) ==
scmEntry.getType() &&
inodeEntry->getHash() == scmEntry.getHash()) {
// This file or directory is unchanged. We can skip it.
XLOG(DBG9) << "diff: unchanged unloaded file: " << entryPath;
} else if (inodeEntry->isDirectory()) {
// This is a modified directory. Since it is not materialized we can
// directly compare the source control objects.
// Collect this future to complete with other deferred entries.
deferredEntries.emplace_back(DeferredDiffEntry::createModifiedScmEntry(
context,
entryPath,
scmEntry.getHash(),
inodeEntry->getHash(),
ignore.get(),
entryIgnored));
} else if (scmEntry.isTree()) {
// This used to be a directory in the source control state,
// but is now a file or symlink. Report the new file, then add a
// deferred entry to report the entire source control Tree as
// removed.
if (entryIgnored) {
if (context->listIgnored) {
XLOG(DBG6) << "diff: directory --> ignored file: " << entryPath;
context->callback->ignoredFile(entryPath);
}
} else {
XLOG(DBG6) << "diff: directory --> untracked file: " << entryPath;
context->callback->addedFile(entryPath);
}
deferredEntries.emplace_back(DeferredDiffEntry::createRemovedScmEntry(
context, entryPath, scmEntry.getHash()));
} else {
// This file corresponds to a different blob hash, or has a
// different mode.
//
// Ideally we should be able to assume that the file is
// modified--if two blobs have different hashes we should be able
// to assume that their contents are different. Unfortunately this
// is not the case for now with our mercurial blob IDs, since the
// mercurial blob data includes the path name and past history
// information.
//
// TODO: Once we build a new backing store and can replace our
// janky hashing scheme for mercurial data, we should be able just
// immediately assume the file is different here, without checking.
if (treeEntryTypeFromMode(inodeEntry->getInitialMode()) !=
scmEntry.getType()) {
// The mode is definitely modified
XLOG(DBG5) << "diff: file modified due to mode change: " << entryPath;
context->callback->modifiedFile(entryPath);
} else {
// TODO: Hopefully at some point we will track file sizes in the
// parent TreeInode::Entry and the TreeEntry. Once we have file
// sizes, we could check for differing file sizes first, and
// avoid loading the blob if they are different.
deferredEntries.emplace_back(DeferredDiffEntry::createModifiedEntry(
context, entryPath, scmEntry, inodeEntry->getHash()));
}
}
};
// Walk through the source control tree entries and our inode entries to
// look for differences.
//
// This code relies on the fact that the source control entries and our
// inode entries are both sorted in the same order.
vector<TreeEntry> emptyEntries;
const auto& scEntries = tree ? tree->getTreeEntries() : emptyEntries;
auto& inodeEntries = contents->entries;
size_t scIdx = 0;
auto inodeIter = inodeEntries.begin();
while (true) {
if (scIdx >= scEntries.size()) {
if (inodeIter == inodeEntries.end()) {
// All Done
break;
}
// This entry is present locally but not in the source control tree.
processUntracked(inodeIter->first, &inodeIter->second);
++inodeIter;
} else if (inodeIter == inodeEntries.end()) {
// This entry is present in the old tree but not the old one.
processRemoved(scEntries[scIdx]);
++scIdx;
} else {
auto compare = comparePathComponent(
scEntries[scIdx].getName(),
inodeIter->first,
context->caseSensitive);
if (compare == CompareResult::BEFORE) {
processRemoved(scEntries[scIdx]);
++scIdx;
} else if (compare == CompareResult::AFTER) {
processUntracked(inodeIter->first, &inodeIter->second);
++inodeIter;
} else {
const auto& scmEntry = scEntries[scIdx];
auto* inodeEntry = &inodeIter->second;
++scIdx;
++inodeIter;
processBothPresent(scmEntry, inodeEntry);
}
}
}
}
// Finish setting up any load operations we started while holding the
// contents_ lock above.
for (auto& load : pendingLoads) {
load.finish();
}
// Now process all of the deferred work.
vector<Future<Unit>> deferredFutures;
for (auto& entry : deferredEntries) {
deferredFutures.push_back(entry->run());
}
// Wait on all of the deferred entries to complete.
// Note that we explicitly move-capture the deferredFutures vector into this
// callback, to ensure that the DeferredDiffEntry objects do not get
// destroyed before they complete.
return folly::collectAll(deferredFutures)
.toUnsafeFuture()
.thenValue([self = std::move(self),
currentPath = RelativePath{std::move(currentPath)},
context,
// Capture ignore to ensure it remains valid until all of our
// children's diff operations complete.
ignore = std::move(ignore),
deferredJobs = std::move(deferredEntries)](
vector<folly::Try<Unit>> results) {
// Call diffError() for any jobs that failed.
for (size_t n = 0; n < results.size(); ++n) {
auto& result = results[n];
if (result.hasException()) {
XLOG(WARN) << "exception processing diff for "
<< deferredJobs[n]->getPath() << ": "
<< folly::exceptionStr(result.exception());
context->callback->diffError(
deferredJobs[n]->getPath(), result.exception());
}
}
// Report success here, even if some of our deferred jobs failed.
// We will have reported those errors to the callback already, and so we
// don't want our parent to report a new error at our path.
return makeFuture();
});
}