in watchman/root/iothread.cpp [419:574]
void InMemoryView::crawler(
const std::shared_ptr<Root>& root,
ViewDatabase& view,
PendingChanges& coll,
const PendingChange& pending,
std::vector<w_string>& pendingCookies) {
bool recursive = pending.flags.contains(W_PENDING_RECURSIVE);
bool stat_all = pending.flags.contains(W_PENDING_NONRECURSIVE_SCAN);
auto dir = view.resolveDir(pending.path, true);
// Detect root directory replacement.
// The inode number check is handled more generally by the sister code
// in stat.cpp. We need to special case it for the root because we never
// generate a watchman_file node for the root and thus never call
// InMemoryView::statPath (we'll fault if we do!).
// Ideally the kernel would have given us a signal when we've been replaced
// but some filesystems (eg: BTRFS) do not emit appropriate inotify events
// for things like subvolume deletes. We've seen situations where the
// root has been replaced and we got no notifications at all and this has
// left the cookie sync mechanism broken forever.
if (pending.path == root->root_path) {
try {
auto st = fileSystem_.getFileInformation(
pending.path.c_str(), root->case_sensitive);
if (st.ino != view.getRootInode()) {
// If it still exists and the inode doesn't match, then we need
// to force recrawl to make sure we're in sync.
// We're lazily initializing the rootInode to 0 here, so we don't
// need to do this the first time through (we're already crawling
// everything in that case).
if (view.getRootInode() != 0) {
root->scheduleRecrawl(
"root was replaced and we didn't get notified by the kernel");
return;
}
recursive = true;
view.setRootInode(st.ino);
}
} catch (const std::system_error& err) {
handle_open_errno(
*root, dir, pending.now, "getFileInformation", err.code());
view.markDirDeleted(*watcher_, dir, getClock(pending.now), true);
return;
}
}
auto& path = pending.path;
logf(
DBG, "opendir({}) recursive={} stat_all={}\n", path, recursive, stat_all);
/* Start watching and open the dir for crawling.
* Whether we open the dir prior to watching or after is watcher specific,
* so the operations are rolled together in our abstraction */
std::unique_ptr<DirHandle> osdir;
try {
osdir = watcher_->startWatchDir(root, dir, path.c_str());
} catch (const std::system_error& err) {
logf(DBG, "startWatchDir({}) threw {}\n", path, err.what());
handle_open_errno(*root, dir, pending.now, "opendir", err.code());
view.markDirDeleted(*watcher_, dir, getClock(pending.now), true);
return;
}
if (dir->files.empty()) {
// Pre-size our hash(es) if we can, so that we can avoid collisions
// and re-hashing during initial crawl
uint32_t num_dirs = 0;
#ifndef _WIN32
struct stat st;
int dfd = osdir->getFd();
if (dfd != -1 && fstat(dfd, &st) == 0) {
num_dirs = (uint32_t)st.st_nlink;
}
#endif
// st.st_nlink is usually number of dirs + 2 (., ..).
// If it is less than 2 then it doesn't follow that convention.
// We just pass it through for the dir size hint and the hash
// table implementation will round that up to the next power of 2
apply_dir_size_hint(
dir,
num_dirs,
uint32_t(root->config.getInt("hint_num_files_per_dir", 64)));
}
/* flag for delete detection */
for (auto& it : dir->files) {
auto file = it.second.get();
if (file->exists) {
file->maybe_deleted = true;
}
}
try {
while (const DirEntry* dirent = osdir->readDir()) {
// Don't follow parent/self links
if (dirent->d_name[0] == '.' &&
(!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) {
continue;
}
// Queue it up for analysis if the file is newly existing
w_string name(dirent->d_name, W_STRING_BYTE);
struct watchman_file* file = dir->getChildFile(name);
if (file) {
file->maybe_deleted = false;
}
if (!file || !file->exists || stat_all || recursive) {
auto full_path = dir->getFullPathToChild(name);
PendingFlags newFlags;
if (recursive || !file || !file->exists) {
newFlags.set(W_PENDING_RECURSIVE);
}
if (pending.flags & W_PENDING_IS_DESYNCED) {
newFlags.set(W_PENDING_IS_DESYNCED);
}
logf(
DBG,
"in crawler calling processPath on {} oldflags={} newflags={}\n",
full_path,
pending.flags.asRaw(),
newFlags.asRaw());
PendingChange full_pending{std::move(full_path), pending.now, newFlags};
processPath(root, view, coll, full_pending, dirent, pendingCookies);
}
}
} catch (const std::system_error& exc) {
log(ERR,
"Error while reading dir ",
path,
": ",
exc.what(),
", re-adding to pending list to re-assess\n");
coll.add(path, pending.now, {});
}
osdir.reset();
// Anything still in maybe_deleted is actually deleted.
// Arrange to re-process it shortly
for (auto& it : dir->files) {
auto file = it.second.get();
if (file->exists &&
(file->maybe_deleted || (file->stat.isDir() && recursive))) {
coll.add(
dir,
file->getName().data(),
pending.now,
recursive ? W_PENDING_RECURSIVE : PendingFlags{});
}
}
}