in java/tools/src/java/org/apache/orc/tools/CheckTool.java [59:147]
public static void main(Configuration conf, String[] args) throws Exception {
Options opts = createOptions();
CommandLine cli = new DefaultParser().parse(opts, args);
HelpFormatter formatter = new HelpFormatter();
if (cli.hasOption('h')) {
formatter.printHelp("check", opts);
return;
}
String type = cli.getOptionValue("type");
if (type == null ||
(!type.equals(CHECK_TYPE_PREDICATE) &&
!type.equals(CHECK_TYPE_STAT) &&
!type.equals(CHECK_TYPE_BLOOM_FILTER))) {
System.err.printf("type %s not support %n", type);
formatter.printHelp("check", opts);
return;
}
String column = cli.getOptionValue("column");
if (column == null || column.isEmpty()) {
System.err.println("column is null");
formatter.printHelp("check", opts);
return;
}
String[] values = cli.getOptionValues("values");
if (values == null || values.length == 0) {
System.err.println("values is null");
formatter.printHelp("check", opts);
return;
}
boolean ignoreExtension = cli.hasOption("ignoreExtension");
List<Path> inputFiles = new ArrayList<>();
String[] files = cli.getArgs();
for (String root : files) {
Path rootPath = new Path(root);
FileSystem fs = rootPath.getFileSystem(conf);
for (RemoteIterator<LocatedFileStatus> itr = fs.listFiles(rootPath, true); itr.hasNext(); ) {
LocatedFileStatus status = itr.next();
if (status.isFile() && (ignoreExtension || status.getPath().getName().endsWith(".orc"))) {
inputFiles.add(status.getPath());
}
}
}
if (inputFiles.isEmpty()) {
System.err.println("No files found.");
System.exit(1);
}
for (Path inputFile : inputFiles) {
System.out.println("input file: " + inputFile);
FileSystem fs = inputFile.getFileSystem(conf);
try (Reader reader = OrcFile.createReader(inputFile,
OrcFile.readerOptions(conf).filesystem(fs))) {
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
TypeDescription schema = reader.getSchema();
boolean[] includedColumns = OrcUtils.includeColumns(column, schema);
int colIndex = -1;
for (int i = 0; i < includedColumns.length; i++) {
if (includedColumns[i]) {
colIndex = i;
break;
}
}
if (colIndex == -1) {
System.err.printf("column: %s not found in file: %s%n", column, inputFile);
continue;
}
int stripeIndex = -1;
for (StripeInformation stripe : reader.getStripes()) {
++stripeIndex;
OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
OrcProto.ColumnEncoding columnEncoding = footer.getColumns(colIndex);
TypeDescription subtype = reader.getSchema().findSubtype(colIndex);
TypeDescription.Category columnCategory = subtype.getCategory();
OrcIndex indices = rows.readRowIndex(stripeIndex, null, includedColumns);
if (type.equals(CHECK_TYPE_BLOOM_FILTER)) {
checkBloomFilter(inputFile, reader, indices, stripeIndex,
colIndex, column, columnEncoding, columnCategory, values);
} else {
checkStatOrPredicate(inputFile, reader, indices, stripeIndex,
colIndex, column, columnEncoding, subtype, columnCategory, values, type);
}
}
}
}
}