in samza-core/src/main/java/org/apache/samza/storage/blobstore/util/DirDiffUtil.java [78:145]
public BiPredicate<File, DirIndex> areSameDir(Set<String> filesToIgnore, boolean compareLargeFileChecksums, boolean compareFileOwners) {
return (localDir, remoteDir) -> {
String remoteDirName = remoteDir.getDirName().equals(DirIndex.ROOT_DIR_NAME) ? "root" : remoteDir.getDirName();
LOG.debug("Creating diff between local dir: {} and remote dir: {} for comparison.",
localDir.getAbsolutePath(), remoteDirName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localDir, remoteDir, DirDiffUtil.areSameFile(compareLargeFileChecksums, compareFileOwners));
boolean areSameDir = true;
List<String> filesRemoved = dirDiff.getFilesRemoved().stream()
.map(FileIndex::getFileName)
.filter(name -> !filesToIgnore.contains(name))
.collect(Collectors.toList());
if (!filesRemoved.isEmpty()) {
areSameDir = false;
LOG.error("Local directory: {} is missing files that are present in remote snapshot: {}",
localDir.getAbsolutePath(), StringUtils.join(filesRemoved, ", "));
}
List<DirIndex> subDirsRemoved = dirDiff.getSubDirsRemoved();
if (!subDirsRemoved.isEmpty()) {
areSameDir = false;
List<String> missingSubDirs = subDirsRemoved.stream().map(DirIndex::getDirName).collect(Collectors.toList());
LOG.error("Local directory: {} is missing sub-dirs that are present in remote snapshot: {}",
localDir.getAbsolutePath(), StringUtils.join(missingSubDirs, ", "));
}
List<String> filesAdded = dirDiff.getFilesAdded().stream()
.map(File::getName)
.filter(name -> !filesToIgnore.contains(name))
.collect(Collectors.toList());
if (!filesAdded.isEmpty()) {
areSameDir = false;
LOG.error("Local directory: {} has additional files that are not present in remote snapshot: {}",
localDir.getAbsolutePath(), StringUtils.join(filesAdded, ", "));
}
List<DirDiff> subDirsAdded = dirDiff.getSubDirsAdded();
if (!subDirsAdded.isEmpty()) {
areSameDir = false;
List<String> addedDirs = subDirsAdded.stream().map(DirDiff::getDirName).collect(Collectors.toList());
LOG.error("Local directory: {} has additional sub-dirs that are not present in remote snapshot: {}",
localDir.getAbsolutePath(), StringUtils.join(addedDirs, ", "));
}
// dir diff calculation already ensures that all retained files are equal (by definition)
// recursively test that all retained sub-dirs are equal as well
Map<String, DirIndex> remoteSubDirs = new HashMap<>();
for (DirIndex subDir: remoteDir.getSubDirsPresent()) {
remoteSubDirs.put(subDir.getDirName(), subDir);
}
for (DirDiff subDirRetained: dirDiff.getSubDirsRetained()) {
String localSubDirName = subDirRetained.getDirName();
File localSubDirFile = Paths.get(localDir.getAbsolutePath(), localSubDirName).toFile();
DirIndex remoteSubDir = remoteSubDirs.get(localSubDirName);
boolean areSameSubDir = areSameDir(filesToIgnore, false, compareFileOwners).test(localSubDirFile, remoteSubDir);
if (!areSameSubDir) {
LOG.debug("Local sub-dir: {} and remote sub-dir: {} are not same.",
localSubDirFile.getAbsolutePath(), remoteSubDir.getDirName());
areSameDir = false;
}
}
LOG.debug("Local dir: {} and remote dir: {} are {}the same.",
localDir.getAbsolutePath(), remoteDirName, areSameDir ? "" : "not ");
return areSameDir;
};
}