in hbase-hbck2/src/main/java/org/apache/hbase/hbck1/HBaseFsck.java [2165:2214]
public void loadHdfsRegionDirs() throws IOException, InterruptedException {
Path rootDir = CommonFSUtils.getRootDir(getConf());
FileSystem fs = rootDir.getFileSystem(getConf());
// List all tables from HDFS
List<FileStatus> tableDirs = Lists.newArrayList();
if (!checkMetaOnly) {
for (Path tableDir : this.tableDirs) {
try {
tableDirs.add(fs.getFileStatus(tableDir));
} catch (IOException ioe) {
LOG.warn("Failed to get Table directory for included table: {}",
CommonFSUtils.getTableName(tableDir), ioe);
}
}
} else {
tableDirs.add(fs.getFileStatus(
CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME)));
}
// Verify that version file exists
if (!versionFileExists(fs, rootDir)) {
errors.reportError(ErrorReporter.ERROR_CODE.NO_VERSION_FILE,
"Version file does not exist under " + rootDir);
if (shouldFixVersionFile()) {
setShouldRerun();
versionFileCreate(getConf(), fs, rootDir);
}
}
// Avoid multithreading at table-level because already multithreaded internally at
// region-level. Additionally multithreading at table-level can lead to deadlock
// if there are many tables in the cluster. Since there are a limited # of threads
// in the executor's thread pool and if we multithread at the table-level by putting
// WorkItemHdfsDir callables into the executor, then we will have some threads in the
// executor tied up solely in waiting for the tables' region-level calls to complete.
// If there are enough tables then there will be no actual threads in the pool left
// for the region-level callables to be serviced.
for (FileStatus tableDir : tableDirs) {
LOG.debug("Loading region dirs from " +tableDir.getPath());
WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
try {
item.call();
} catch (ExecutionException e) {
LOG.warn("Could not completely load table dir " +
tableDir.getPath(), e.getCause());
}
}
}