in hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java [2103:2334]
private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
throws IOException, KeeperException, InterruptedException {
if (hbi.isSkipChecks()) return;
String descriptiveName = hbi.toString();
boolean inMeta = hbi.getMetaEntry() != null;
// In case not checking HDFS, assume the region is on HDFS
boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
boolean isDeployed = !hbi.getDeployedOn().isEmpty();
boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed
&& hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
boolean splitParent = inMeta && hbi.getMetaEntry().getRegionInfo().isSplit()
&& hbi.getMetaEntry().getRegionInfo().isOffline();
boolean shouldBeDeployed =
inMeta && !isTableDisabled(hbi.getMetaEntry().getRegionInfo().getTable());
boolean recentlyModified =
inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
// ========== First the healthy cases =============
if (hbi.containsOnlyHdfsEdits()) {
return;
}
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
return;
} else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
LOG.info("Region " + descriptiveName + " is in META, and in a disabled "
+ "tabled that is not deployed");
return;
} else if (recentlyModified) {
LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
return;
}
// ========== Cases where the region is not in hbase:meta =============
else if (!inMeta && !inHdfs && !isDeployed) {
// We shouldn't have record of this region at all then!
assert false : "Entry for region with no data";
} else if (!inMeta && !inHdfs && isDeployed) {
errors.reportError(ERROR_CODE.NOT_IN_META_HDFS,
"Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but "
+ "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
if (shouldFixAssignments()) {
undeployRegions(hbi);
}
} else if (!inMeta && inHdfs && !isDeployed) {
if (hbi.isMerged()) {
// This region has already been merged, the remaining hdfs file will be
// cleaned by CatalogJanitor later
hbi.setSkipChecks(true);
LOG.info("Region " + descriptiveName
+ " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
return;
}
errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName
+ " on HDFS, but not listed in hbase:meta " + "or deployed on any region server");
// restore region consistency of an adopted orphan
if (shouldFixMeta()) {
if (!hbi.isHdfsRegioninfoPresent()) {
LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
+ " in table integrity repair phase if -fixHdfsOrphans was" + " used.");
return;
}
RegionInfo hri = hbi.getHdfsHRI();
HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
if (
Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
&& (region.getEndKey().length == 0
|| Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0)
&& Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0
) {
if (region.isSplit() || region.isOffline()) continue;
Path regionDir = hbi.getHdfsRegionDir();
FileSystem fs = regionDir.getFileSystem(getConf());
List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
for (Path familyDir : familyDirs) {
List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
for (Path referenceFilePath : referenceFilePaths) {
Path parentRegionDir =
StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
LOG.warn(hri + " start and stop keys are in the range of " + region
+ ". The region might not be cleaned up from hdfs when region " + region
+ " split failed. Hence deleting from hdfs.");
HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(),
hri);
return;
}
}
}
}
}
LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
numReplicas);
tryAssignmentRepair(hbi, "Trying to reassign region...");
}
} else if (!inMeta && inHdfs && isDeployed) {
errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
+ " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
debugLsr(hbi.getHdfsRegionDir());
if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
// for replicas, this means that we should undeploy the region (we would have
// gone over the primaries and fixed meta holes in first phase under
// checkAndFixConsistency; we shouldn't get the condition !inMeta at
// this stage unless unwanted replica)
if (shouldFixAssignments()) {
undeployRegionsForHbi(hbi);
}
}
if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
if (!hbi.isHdfsRegioninfoPresent()) {
LOG.error("This should have been repaired in table integrity repair phase");
return;
}
LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
numReplicas);
tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
}
// ========== Cases where the region is in hbase:meta =============
} else if (inMeta && inHdfs && !isDeployed && splitParent) {
// check whether this is an actual error, or just transient state where parent
// is not cleaned
if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
// check that split daughters are there
HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
if (infoA != null && infoB != null) {
// we already processed or will process daughters. Move on, nothing to see here.
hbi.setSkipChecks(true);
return;
}
}
// For Replica region, we need to do a similar check. If replica is not split successfully,
// error is going to be reported against primary daughter region.
if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
+ "and not deployed on any region server. This may be transient.");
hbi.setSkipChecks(true);
return;
}
errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT,
"Region " + descriptiveName + " is a split parent in META, in HDFS, "
+ "and not deployed on any region server. This could be transient, "
+ "consider to run the catalog janitor first!");
if (shouldFixSplitParents()) {
setShouldRerun();
resetSplitParent(hbi);
}
} else if (inMeta && !inHdfs && !isDeployed) {
errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName
+ " found in META, but not in HDFS " + "or deployed on any region server.");
if (shouldFixMeta()) {
deleteMetaRegion(hbi);
}
} else if (inMeta && !inHdfs && isDeployed) {
errors.reportError(ERROR_CODE.NOT_IN_HDFS,
"Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on "
+ Joiner.on(", ").join(hbi.getDeployedOn()));
// We treat HDFS as ground truth. Any information in meta is transient
// and equivalent data can be regenerated. So, lets unassign and remove
// these problems from META.
if (shouldFixAssignments()) {
errors.print("Trying to fix unassigned region...");
undeployRegions(hbi);
}
if (shouldFixMeta()) {
// wait for it to complete
deleteMetaRegion(hbi);
}
} else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
errors.reportError(ERROR_CODE.NOT_DEPLOYED,
"Region " + descriptiveName + " not deployed on any region server.");
tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
"Region " + descriptiveName + " should not be deployed according "
+ "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
if (shouldFixAssignments()) {
errors.print("Trying to close the region " + descriptiveName);
setShouldRerun();
HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
hbi.getDeployedOn());
}
} else if (inMeta && inHdfs && isMultiplyDeployed) {
errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
"Region " + descriptiveName + " is listed in hbase:meta on region server "
+ hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers "
+ Joiner.on(", ").join(hbi.getDeployedOn()));
// If we are trying to fix the errors
if (shouldFixAssignments()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
hbi.getDeployedOn());
}
} else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META,
"Region " + descriptiveName + " listed in hbase:meta on region server "
+ hbi.getMetaEntry().regionServer + " but found on region server "
+ hbi.getDeployedOn().get(0));
// If we are trying to fix the errors
if (shouldFixAssignments()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
hbi.getDeployedOn());
HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
}
} else {
errors.reportError(ERROR_CODE.UNKNOWN,
"Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta
+ " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed="
+ isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta
+ " shouldBeDeployed=" + shouldBeDeployed);
}
}