in priam/src/main/java/com/netflix/priam/identity/token/TokenRetrieverUtils.java [35:112]
public static InferredTokenOwnership inferTokenOwnerFromGossip(
ImmutableSet<PriamInstance> allIds, String token, String dc) {
// Avoid using dead instance who we are trying to replace (duh!!)
// Avoid other regions instances to avoid communication over public ip address.
List<? extends PriamInstance> eligibleInstances =
allIds.stream()
.filter(priamInstance -> !token.equalsIgnoreCase(priamInstance.getToken()))
.filter(priamInstance -> priamInstance.getDC().equalsIgnoreCase(dc))
.collect(Collectors.toList());
// We want to get IP from min 1, max 3 instances to ensure we are not relying on
// gossip of a single instance.
// Good idea to shuffle so we are not talking to same instances every time.
Collections.shuffle(eligibleInstances);
// Potential issue could be when you have about 50% of your cluster C* DOWN or
// trying to be replaced.
// Think of a major disaster hitting your cluster. In that scenario chances of
// instance hitting DOWN C* are much much higher.
// In such a case you should rely on @link{CassandraConfig#setReplacedIp}.
int noOfInstancesGossipShouldMatch = Math.max(1, Math.min(3, eligibleInstances.size()));
// While it is ideal to check all the nodes in the ring to see if they agree on
// the IP to be replaced, in large clusters it may affect the startup
// performance. So we pick three random hosts from the ring and see if they all
// agree on the IP to be replaced. If not, we don't replace.
InferredTokenOwnership inferredTokenOwnership = new InferredTokenOwnership();
int matchedGossipInstances = 0, reachableInstances = 0;
for (PriamInstance instance : eligibleInstances) {
logger.info("Finding down nodes from ip[{}]; token[{}]", instance.getHostIP(), token);
try {
TokenInformation tokenInformation =
getTokenInformation(instance.getHostIP(), token);
reachableInstances++;
if (inferredTokenOwnership.getTokenInformation() == null) {
inferredTokenOwnership.setTokenInformation(tokenInformation);
}
if (inferredTokenOwnership.getTokenInformation().equals(tokenInformation)) {
matchedGossipInstances++;
if (matchedGossipInstances == noOfInstancesGossipShouldMatch) {
inferredTokenOwnership.setTokenInformationStatus(
InferredTokenOwnership.TokenInformationStatus.GOOD);
return inferredTokenOwnership;
}
} else {
// Mismatch in the gossip information from Cassandra.
inferredTokenOwnership.setTokenInformationStatus(
InferredTokenOwnership.TokenInformationStatus.MISMATCH);
logger.info(
"There is a mismatch in the status information reported by Cassandra. TokenInformation1: {}, TokenInformation2: {}",
inferredTokenOwnership.getTokenInformation(),
tokenInformation);
inferredTokenOwnership.setTokenInformation(
inferredTokenOwnership.getTokenInformation().isLive
? inferredTokenOwnership.getTokenInformation()
: tokenInformation);
return inferredTokenOwnership;
}
} catch (GossipParseException e) {
logger.warn(e.getMessage());
}
}
// If we are not able to reach at least minimum required instances.
if (reachableInstances < noOfInstancesGossipShouldMatch) {
inferredTokenOwnership.setTokenInformationStatus(
InferredTokenOwnership.TokenInformationStatus.UNREACHABLE_NODES);
logger.info(
String.format(
"Unable to find enough instances where gossip match. Required: [%d]",
noOfInstancesGossipShouldMatch));
}
return inferredTokenOwnership;
}