in spark-job/src/main/java/org/apache/cassandra/diff/DiffJob.java [246:267]
static List<Split> calculateSplits(int numSplits, int numBuckets, TokenHelper tokenHelper) {
List<Split> splits = new ArrayList<>(numSplits);
BigInteger minToken = tokenHelper.min();
BigInteger maxToken = tokenHelper.max();
BigInteger totalTokens = maxToken.subtract(minToken);
BigInteger segmentSize = totalTokens.divide(BigInteger.valueOf(numSplits));
// add the first split starting at minToken without adding BigInt.ONE below
// Splits are grouped into buckets so we can shard the journal info across
// C* partitions
splits.add(new Split(0, 0, minToken, minToken.add(segmentSize)));
BigInteger prev = minToken.add(segmentSize);
for (int i = 1; i < numSplits - 1; i++) {
BigInteger next = prev.add(segmentSize);
// add ONE to avoid split overlap
splits.add(new Split(i, i % numBuckets, prev.add(BigInteger.ONE), next));
prev = next;
}
splits.add(new Split(numSplits - 1, (numSplits - 1) % numBuckets, prev.add(BigInteger.ONE), maxToken)); // make sure we cover the whole range
return splits;
}