in spark-job/src/main/java/org/apache/cassandra/diff/DiffJob.java [231:243]
private static List<Split> getSplits(JobConfiguration config, TokenHelper tokenHelper) {
logger.info("Initializing splits");
List<Split> splits = calculateSplits(config.splits(), config.buckets(), tokenHelper);
logger.info("All Splits: {}", splits);
if (!config.specificTokens().isEmpty() && config.specificTokens().modifier == SpecificTokens.Modifier.ACCEPT) {
splits = getSplitsForTokens(config.specificTokens().tokens, splits);
logger.info("Splits for specific tokens ONLY: {}", splits);
}
// shuffle the splits to make sure the work is spread over the workers,
// important if it isn't a full cluster is being compared
Collections.shuffle(splits);
return splits;
}