in hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/logs/container/utils/ContainerDatanodeDatabase.java [375:480]
private void analyzeContainerHealth(Long containerID,
Map<String, DatanodeContainerInfo> latestPerDatanode) {
Set<String> lifeCycleStates = new HashSet<>();
for (HddsProtos.LifeCycleState state : HddsProtos.LifeCycleState.values()) {
lifeCycleStates.add(state.name());
}
Set<String> healthStates = new HashSet<>();
for (ReplicationManagerReport.HealthState state : ReplicationManagerReport.HealthState.values()) {
healthStates.add(state.name());
}
Set<String> unhealthyReplicas = new HashSet<>();
Set<String> closedReplicas = new HashSet<>();
Set<String> openReplicas = new HashSet<>();
Set<String> quasiclosedReplicas = new HashSet<>();
Set<String> deletedReplicas = new HashSet<>();
Set<Long> bcsids = new HashSet<>();
Set<String> datanodeIds = new HashSet<>();
List<String> closedTimestamps = new ArrayList<>();
List<String> otherTimestamps = new ArrayList<>();
for (DatanodeContainerInfo entry : latestPerDatanode.values()) {
String datanodeId = entry.getDatanodeId();
String state = entry.getState();
long bcsid = entry.getBcsid();
String stateTimestamp = entry.getTimestamp();
datanodeIds.add(datanodeId);
if (healthStates.contains(state.toUpperCase())) {
ReplicationManagerReport.HealthState healthState =
ReplicationManagerReport.HealthState.valueOf(state.toUpperCase());
if (healthState == ReplicationManagerReport.HealthState.UNHEALTHY) {
unhealthyReplicas.add(datanodeId);
}
} else if (lifeCycleStates.contains(state.toUpperCase())) {
HddsProtos.LifeCycleState lifeCycleState = HddsProtos.LifeCycleState.valueOf(state.toUpperCase());
switch (lifeCycleState) {
case OPEN:
openReplicas.add(datanodeId);
otherTimestamps.add(stateTimestamp);
break;
case CLOSING:
otherTimestamps.add(stateTimestamp);
break;
case CLOSED:
closedReplicas.add(datanodeId);
bcsids.add(bcsid);
closedTimestamps.add(stateTimestamp);
break;
case QUASI_CLOSED:
quasiclosedReplicas.add(datanodeId);
otherTimestamps.add(stateTimestamp);
break;
case DELETED:
deletedReplicas.add(datanodeId);
break;
default:
break;
}
}
}
int closedCount = closedReplicas.size();
boolean allClosedNewer = closedCount > 0 && closedTimestamps.stream()
.allMatch(ct -> otherTimestamps.stream().allMatch(ot -> ct.compareTo(ot) > 0));
if (bcsids.size() > 1) {
out.println("Container " + containerID + " has MISMATCHED REPLICATION as there are multiple" +
" CLOSED containers with varying BCSIDs.");
} else if (closedCount == DEFAULT_REPLICATION_FACTOR && allClosedNewer) {
out.println("Container " + containerID + " has enough replicas.");
} else if (closedCount > DEFAULT_REPLICATION_FACTOR && allClosedNewer) {
out.println("Container " + containerID + " is OVER-REPLICATED.");
} else if (closedCount < DEFAULT_REPLICATION_FACTOR && closedCount != 0 && allClosedNewer) {
out.println("Container " + containerID + " is UNDER-REPLICATED.");
} else {
int replicaCount = datanodeIds.size();
if (!quasiclosedReplicas.isEmpty()
&& closedReplicas.isEmpty()
&& quasiclosedReplicas.size() >= DEFAULT_REPLICATION_FACTOR) {
out.println("Container " + containerID + " might be QUASI_CLOSED_STUCK.");
} else if (!unhealthyReplicas.isEmpty()) {
out.println("Container " + containerID + " has UNHEALTHY replicas.");
} else if (!openReplicas.isEmpty() &&
(replicaCount - openReplicas.size()) > 0) {
out.println("Container " + containerID + " might be OPEN_UNHEALTHY.");
} else if (replicaCount - deletedReplicas.size() < DEFAULT_REPLICATION_FACTOR) {
out.println("Container " + containerID + " is UNDER-REPLICATED.");
} else if (replicaCount - deletedReplicas.size() > DEFAULT_REPLICATION_FACTOR) {
out.println("Container " + containerID + " is OVER-REPLICATED.");
} else {
out.println("Container " + containerID + " has enough replicas.");
}
}
}