in mysqloperator/controller/diagnose.py [0:0]
def find_group_partitions(online_pod_info: Dict[str, InstanceStatus],
pods: Set[MySQLPod], logger) -> Tuple[List[List[InstanceStatus]], List[Set[MySQLPod]]]:
# List of group partitions that have quorum and can execute transactions.
# If there's more than 1, then there's a split-brain. If there's none, then
# we have no availability.
active_partitions: List[List[InstanceStatus]] = []
# List of group partitions that have no quorum and can't execute transactions.
blocked_partitions: List[Set[MySQLPod]] = []
all_pods = {}
for pod in pods:
all_pods[pod.endpoint] = pod
no_primary_active_partitions = []
for ep, p in online_pod_info.items():
# logger.info(f"{ep}: {'QUORUM' if p.in_quorum else 'NOQUORUM'} {'PRIM' if p.is_primary else 'SEC'} ONLINE_PODS={online_pod_info.keys()}")
# logger.info(f"PEERS OF {ep}={p.peers}")
if p.in_quorum:
online_peers = [peer for peer, state in p.peers.items() if state in ("ONLINE", "RECOVERING")] # A: UNMANAGED ?
missing = set(online_peers) - set(online_pod_info.keys())
if missing:
logger.info(
f"Group view of {ep} has {p.peers.keys()} but these are not ONLINE: {missing}")
raise kopf.TemporaryError(
"Cluster status results inconsistent", delay=5)
part = [online_pod_info[peer] for peer,
state in p.peers.items() if state in ("ONLINE", "RECOVERING")] # A: NOT_MANAGED ?
if p.is_primary:
active_partitions.append(part)
else:
no_primary_active_partitions.append(part)
if not active_partitions and no_primary_active_partitions:
# it's possible for a group with quorum to not have a PRIMARY
# for a short time if the PRIMARY is removed from the group
raise kopf.TemporaryError(
"Cluster has quorum but no PRIMARY", delay=10)
def active_partition_with(pod):
for part in active_partitions:
if pod.endpoint in part:
return part
return None
# print()
for ep, p in online_pod_info.items():
# print(ep, p.status, p.in_quorum, p.peers)
if not p.in_quorum:
part = active_partition_with(p)
assert not part, f"Inconsistent group view, {p} not expected to be in {part}"
part = set([all_pods[peer] for peer, state in p.peers.items()
if state not in ("(MISSING)", "UNREACHABLE")])
if part not in blocked_partitions:
blocked_partitions.append(part)
# print("ACTIVE PARTS", active_partitions)
# print("BLOCKED PARTS", blocked_partitions)
# print()
# sort by partition size
blocked_partitions.sort(key=lambda x: len(x), reverse=True)
return active_partitions, blocked_partitions