in solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java [111:272]
protected Map<String, Object> getZkStatus(String zkHost, ZkDynamicConfig zkDynamicConfig) {
final ZkDynamicConfig hostsFromConnectionString = ZkDynamicConfig.fromZkConnectString(zkHost);
final ZkDynamicConfig zookeepers;
boolean dynamicReconfig;
final List<String> errors = new ArrayList<>();
String status = STATUS_NA;
if (zkDynamicConfig == null || zkDynamicConfig.size() == 0) {
// Fallback to parsing zkHost for older zk servers without support for dynamic reconfiguration
dynamicReconfig = false;
zookeepers = hostsFromConnectionString;
} else {
dynamicReconfig = true;
List<String> connStringHosts =
hostsFromConnectionString.getServers().stream()
.map(h -> h.resolveClientPortAddress().toLowerCase(Locale.ROOT) + ":" + h.clientPort)
.sorted()
.collect(Collectors.toList());
List<String> dynamicHosts =
zkDynamicConfig.getServers().stream()
.map(
h ->
h.resolveClientPortAddress().toLowerCase(Locale.ROOT)
+ ":"
+ (h.clientPort != null
? h.clientPort
: hostsFromConnectionString.getServers().get(0).clientPort))
.sorted()
.collect(Collectors.toList());
if (!connStringHosts.containsAll(dynamicHosts)) {
errors.add(
"Your ZK connection string ("
+ connStringHosts.size()
+ " hosts) is different from the "
+ "dynamic ensemble config ("
+ dynamicHosts.size()
+ " hosts). Solr does not currently support "
+ "dynamic reconfiguration and will only be able to connect to the zk hosts in your connection string.");
status = STATUS_YELLOW;
}
if (zkDynamicConfig.getServers().get(0).clientPort != null) {
// If we have dynamic config with client ports, use this list to iterate servers
zookeepers = zkDynamicConfig;
} else {
// Use list from connection string since client port is missing on dynamic config from ZK
zookeepers = hostsFromConnectionString;
}
}
final Map<String, Object> zkStatus = new HashMap<>();
final List<Object> details = new ArrayList<>();
int numOk = 0;
int standalone = 0;
int followers = 0;
int reportedFollowers = 0;
int leaders = 0;
zkStatus.put("ensembleSize", zookeepers.size());
zkStatus.put("zkHost", zkHost);
for (ZkDynamicConfig.Server zk : zookeepers.getServers()) {
final String zkClientHostPort = zk.resolveClientPortAddress() + ":" + zk.clientPort;
try {
Map<String, Object> stat = monitorZookeeper(zkClientHostPort);
if (stat.containsKey("errors")) {
errors.addAll((List<String>) stat.get("errors"));
stat.remove("errors");
}
details.add(stat);
String state = String.valueOf(stat.get("zk_server_state"));
if ("follower".equals(state) || "observer".equals(state)) {
followers++;
} else if ("leader".equals(state)) {
leaders++;
reportedFollowers =
Math.max(
(int) Float.parseFloat((String) stat.getOrDefault("zk_followers", "0")),
(int) Float.parseFloat((String) stat.getOrDefault("zk_synced_followers", "0")));
} else if ("standalone".equals(state)) {
standalone++;
}
if (zk.role != null) {
stat.put("role", zk.role);
}
} catch (SolrException se) {
log.warn("Failed talking to zookeeper {}", zkClientHostPort, se);
errors.add(se.getMessage());
Map<String, Object> stat = new HashMap<>();
stat.put("host", zkClientHostPort);
stat.put("ok", false);
status = STATUS_YELLOW;
details.add(stat);
}
}
zkStatus.put("details", details);
numOk =
(int)
details.stream()
.filter(m -> ((boolean) ((HashMap<String, Object>) m).get("ok")))
.count();
zkStatus.put("dynamicReconfig", dynamicReconfig);
if (followers + leaders > 0 && standalone > 0) {
status = STATUS_RED;
errors.add("The zk nodes do not agree on their mode, check details");
}
if (standalone > 1) {
status = STATUS_RED;
errors.add("Only one zk allowed in standalone mode");
}
if (leaders > 1) {
zkStatus.put("mode", "ensemble");
status = STATUS_RED;
errors.add("Only one leader allowed, got " + leaders);
}
if (followers > 0 && leaders == 0) {
zkStatus.put("mode", "ensemble");
status = STATUS_RED;
errors.add("We do not have a leader");
}
if (leaders > 0 && followers != reportedFollowers) {
zkStatus.put("mode", "ensemble");
status = STATUS_RED;
errors.add(
"Leader reports "
+ reportedFollowers
+ " followers, but we only found "
+ followers
+ ". Please check zkHost configuration");
}
if (followers + leaders == 0 && (standalone == 1 || zookeepers.size() == 1)) {
zkStatus.put("mode", "standalone");
}
if (followers + leaders > 0 && (zookeepers.size()) % 2 == 0) {
if (!STATUS_RED.equals(status)) {
status = STATUS_YELLOW;
}
errors.add("We have an even number of zookeepers which is not recommended");
}
if (followers + leaders > 0 && standalone == 0) {
zkStatus.put("mode", "ensemble");
}
if (numOk == 0) {
status = STATUS_RED;
}
if (status.equals(STATUS_NA)) {
if (numOk == zookeepers.size()) {
status = STATUS_GREEN;
} else if (numOk < zookeepers.size() && numOk > zookeepers.size() / 2) {
status = STATUS_YELLOW;
errors.add("Some zookeepers are down: " + numOk + "/" + zookeepers.size());
} else {
status = STATUS_RED;
errors.add(
"Mismatch in number of zookeeper nodes live. numOK="
+ numOk
+ ", expected "
+ zookeepers.size());
}
}
zkStatus.put("status", status);
if (!errors.isEmpty()) {
zkStatus.put("errors", errors);
}
return zkStatus;
}