private void checkRegionConsistency()

in hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java [2103:2334]


  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
    throws IOException, KeeperException, InterruptedException {

    if (hbi.isSkipChecks()) return;
    String descriptiveName = hbi.toString();
    boolean inMeta = hbi.getMetaEntry() != null;
    // In case not checking HDFS, assume the region is on HDFS
    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
    boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed
      && hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
    boolean splitParent = inMeta && hbi.getMetaEntry().getRegionInfo().isSplit()
      && hbi.getMetaEntry().getRegionInfo().isOffline();
    boolean shouldBeDeployed =
      inMeta && !isTableDisabled(hbi.getMetaEntry().getRegionInfo().getTable());
    boolean recentlyModified =
      inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();

    // ========== First the healthy cases =============
    if (hbi.containsOnlyHdfsEdits()) {
      return;
    }
    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
      return;
    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
      LOG.info("Region " + descriptiveName + " is in META, and in a disabled "
        + "tabled that is not deployed");
      return;
    } else if (recentlyModified) {
      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
      return;
    }
    // ========== Cases where the region is not in hbase:meta =============
    else if (!inMeta && !inHdfs && !isDeployed) {
      // We shouldn't have record of this region at all then!
      assert false : "Entry for region with no data";
    } else if (!inMeta && !inHdfs && isDeployed) {
      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS,
        "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but "
          + "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
      if (shouldFixAssignments()) {
        undeployRegions(hbi);
      }

    } else if (!inMeta && inHdfs && !isDeployed) {
      if (hbi.isMerged()) {
        // This region has already been merged, the remaining hdfs file will be
        // cleaned by CatalogJanitor later
        hbi.setSkipChecks(true);
        LOG.info("Region " + descriptiveName
          + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
        return;
      }
      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName
        + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server");
      // restore region consistency of an adopted orphan
      if (shouldFixMeta()) {
        if (!hbi.isHdfsRegioninfoPresent()) {
          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
            + " in table integrity repair phase if -fixHdfsOrphans was" + " used.");
          return;
        }

        RegionInfo hri = hbi.getHdfsHRI();
        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());

        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
          if (
            Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
              && (region.getEndKey().length == 0
                || Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0)
              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0
          ) {
            if (region.isSplit() || region.isOffline()) continue;
            Path regionDir = hbi.getHdfsRegionDir();
            FileSystem fs = regionDir.getFileSystem(getConf());
            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
            for (Path familyDir : familyDirs) {
              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
              for (Path referenceFilePath : referenceFilePaths) {
                Path parentRegionDir =
                  StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
                  LOG.warn(hri + " start and stop keys are in the range of " + region
                    + ". The region might not be cleaned up from hdfs when region " + region
                    + " split failed. Hence deleting from hdfs.");
                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(),
                    hri);
                  return;
                }
              }
            }
          }
        }
        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
          numReplicas);

        tryAssignmentRepair(hbi, "Trying to reassign region...");
      }

    } else if (!inMeta && inHdfs && isDeployed) {
      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
        + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
      debugLsr(hbi.getHdfsRegionDir());
      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
        // for replicas, this means that we should undeploy the region (we would have
        // gone over the primaries and fixed meta holes in first phase under
        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
        // this stage unless unwanted replica)
        if (shouldFixAssignments()) {
          undeployRegionsForHbi(hbi);
        }
      }
      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
        if (!hbi.isHdfsRegioninfoPresent()) {
          LOG.error("This should have been repaired in table integrity repair phase");
          return;
        }

        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
          numReplicas);
        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
      }

      // ========== Cases where the region is in hbase:meta =============
    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
      // check whether this is an actual error, or just transient state where parent
      // is not cleaned
      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
        // check that split daughters are there
        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
        if (infoA != null && infoB != null) {
          // we already processed or will process daughters. Move on, nothing to see here.
          hbi.setSkipChecks(true);
          return;
        }
      }

      // For Replica region, we need to do a similar check. If replica is not split successfully,
      // error is going to be reported against primary daughter region.
      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
          + "and not deployed on any region server. This may be transient.");
        hbi.setSkipChecks(true);
        return;
      }

      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT,
        "Region " + descriptiveName + " is a split parent in META, in HDFS, "
          + "and not deployed on any region server. This could be transient, "
          + "consider to run the catalog janitor first!");
      if (shouldFixSplitParents()) {
        setShouldRerun();
        resetSplitParent(hbi);
      }
    } else if (inMeta && !inHdfs && !isDeployed) {
      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName
        + " found in META, but not in HDFS " + "or deployed on any region server.");
      if (shouldFixMeta()) {
        deleteMetaRegion(hbi);
      }
    } else if (inMeta && !inHdfs && isDeployed) {
      errors.reportError(ERROR_CODE.NOT_IN_HDFS,
        "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on "
          + Joiner.on(", ").join(hbi.getDeployedOn()));
      // We treat HDFS as ground truth. Any information in meta is transient
      // and equivalent data can be regenerated. So, lets unassign and remove
      // these problems from META.
      if (shouldFixAssignments()) {
        errors.print("Trying to fix unassigned region...");
        undeployRegions(hbi);
      }
      if (shouldFixMeta()) {
        // wait for it to complete
        deleteMetaRegion(hbi);
      }
    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
      errors.reportError(ERROR_CODE.NOT_DEPLOYED,
        "Region " + descriptiveName + " not deployed on any region server.");
      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
        "Region " + descriptiveName + " should not be deployed according "
          + "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
      if (shouldFixAssignments()) {
        errors.print("Trying to close the region " + descriptiveName);
        setShouldRerun();
        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
          hbi.getDeployedOn());
      }
    } else if (inMeta && inHdfs && isMultiplyDeployed) {
      errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
        "Region " + descriptiveName + " is listed in hbase:meta on region server "
          + hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers "
          + Joiner.on(", ").join(hbi.getDeployedOn()));
      // If we are trying to fix the errors
      if (shouldFixAssignments()) {
        errors.print("Trying to fix assignment error...");
        setShouldRerun();
        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
          hbi.getDeployedOn());
      }
    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META,
        "Region " + descriptiveName + " listed in hbase:meta on region server "
          + hbi.getMetaEntry().regionServer + " but found on region server "
          + hbi.getDeployedOn().get(0));
      // If we are trying to fix the errors
      if (shouldFixAssignments()) {
        errors.print("Trying to fix assignment error...");
        setShouldRerun();
        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
          hbi.getDeployedOn());
        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
      }
    } else {
      errors.reportError(ERROR_CODE.UNKNOWN,
        "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta
          + " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed="
          + isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta
          + " shouldBeDeployed=" + shouldBeDeployed);
    }
  }