void InMemoryView::crawler()

in watchman/root/iothread.cpp [419:574]


void InMemoryView::crawler(
    const std::shared_ptr<Root>& root,
    ViewDatabase& view,
    PendingChanges& coll,
    const PendingChange& pending,
    std::vector<w_string>& pendingCookies) {
  bool recursive = pending.flags.contains(W_PENDING_RECURSIVE);
  bool stat_all = pending.flags.contains(W_PENDING_NONRECURSIVE_SCAN);

  auto dir = view.resolveDir(pending.path, true);

  // Detect root directory replacement.
  // The inode number check is handled more generally by the sister code
  // in stat.cpp.  We need to special case it for the root because we never
  // generate a watchman_file node for the root and thus never call
  // InMemoryView::statPath (we'll fault if we do!).
  // Ideally the kernel would have given us a signal when we've been replaced
  // but some filesystems (eg: BTRFS) do not emit appropriate inotify events
  // for things like subvolume deletes.  We've seen situations where the
  // root has been replaced and we got no notifications at all and this has
  // left the cookie sync mechanism broken forever.
  if (pending.path == root->root_path) {
    try {
      auto st = fileSystem_.getFileInformation(
          pending.path.c_str(), root->case_sensitive);
      if (st.ino != view.getRootInode()) {
        // If it still exists and the inode doesn't match, then we need
        // to force recrawl to make sure we're in sync.
        // We're lazily initializing the rootInode to 0 here, so we don't
        // need to do this the first time through (we're already crawling
        // everything in that case).
        if (view.getRootInode() != 0) {
          root->scheduleRecrawl(
              "root was replaced and we didn't get notified by the kernel");
          return;
        }
        recursive = true;
        view.setRootInode(st.ino);
      }
    } catch (const std::system_error& err) {
      handle_open_errno(
          *root, dir, pending.now, "getFileInformation", err.code());
      view.markDirDeleted(*watcher_, dir, getClock(pending.now), true);
      return;
    }
  }

  auto& path = pending.path;

  logf(
      DBG, "opendir({}) recursive={} stat_all={}\n", path, recursive, stat_all);

  /* Start watching and open the dir for crawling.
   * Whether we open the dir prior to watching or after is watcher specific,
   * so the operations are rolled together in our abstraction */
  std::unique_ptr<DirHandle> osdir;

  try {
    osdir = watcher_->startWatchDir(root, dir, path.c_str());
  } catch (const std::system_error& err) {
    logf(DBG, "startWatchDir({}) threw {}\n", path, err.what());
    handle_open_errno(*root, dir, pending.now, "opendir", err.code());
    view.markDirDeleted(*watcher_, dir, getClock(pending.now), true);
    return;
  }

  if (dir->files.empty()) {
    // Pre-size our hash(es) if we can, so that we can avoid collisions
    // and re-hashing during initial crawl
    uint32_t num_dirs = 0;
#ifndef _WIN32
    struct stat st;
    int dfd = osdir->getFd();
    if (dfd != -1 && fstat(dfd, &st) == 0) {
      num_dirs = (uint32_t)st.st_nlink;
    }
#endif
    // st.st_nlink is usually number of dirs + 2 (., ..).
    // If it is less than 2 then it doesn't follow that convention.
    // We just pass it through for the dir size hint and the hash
    // table implementation will round that up to the next power of 2
    apply_dir_size_hint(
        dir,
        num_dirs,
        uint32_t(root->config.getInt("hint_num_files_per_dir", 64)));
  }

  /* flag for delete detection */
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists) {
      file->maybe_deleted = true;
    }
  }

  try {
    while (const DirEntry* dirent = osdir->readDir()) {
      // Don't follow parent/self links
      if (dirent->d_name[0] == '.' &&
          (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) {
        continue;
      }

      // Queue it up for analysis if the file is newly existing
      w_string name(dirent->d_name, W_STRING_BYTE);
      struct watchman_file* file = dir->getChildFile(name);
      if (file) {
        file->maybe_deleted = false;
      }
      if (!file || !file->exists || stat_all || recursive) {
        auto full_path = dir->getFullPathToChild(name);

        PendingFlags newFlags;
        if (recursive || !file || !file->exists) {
          newFlags.set(W_PENDING_RECURSIVE);
        }
        if (pending.flags & W_PENDING_IS_DESYNCED) {
          newFlags.set(W_PENDING_IS_DESYNCED);
        }

        logf(
            DBG,
            "in crawler calling processPath on {} oldflags={} newflags={}\n",
            full_path,
            pending.flags.asRaw(),
            newFlags.asRaw());

        PendingChange full_pending{std::move(full_path), pending.now, newFlags};
        processPath(root, view, coll, full_pending, dirent, pendingCookies);
      }
    }
  } catch (const std::system_error& exc) {
    log(ERR,
        "Error while reading dir ",
        path,
        ": ",
        exc.what(),
        ", re-adding to pending list to re-assess\n");
    coll.add(path, pending.now, {});
  }
  osdir.reset();

  // Anything still in maybe_deleted is actually deleted.
  // Arrange to re-process it shortly
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists &&
        (file->maybe_deleted || (file->stat.isDir() && recursive))) {
      coll.add(
          dir,
          file->getName().data(),
          pending.now,
          recursive ? W_PENDING_RECURSIVE : PendingFlags{});
    }
  }
}