in java/core/src/java/org/apache/orc/Reader.java [210:715]
List<StripeStatistics> getVariantStripeStatistics(EncryptionVariant variant
) throws IOException;
/**
* Options for creating a RecordReader.
* @since 1.1.0
*/
class Options implements Cloneable {
private boolean[] include;
private long offset = 0;
private long length = Long.MAX_VALUE;
private int positionalEvolutionLevel;
private SearchArgument sarg = null;
private String[] columnNames = null;
private Boolean useZeroCopy = null;
private Boolean skipCorruptRecords = null;
private TypeDescription schema = null;
private String[] preFilterColumns = null;
Consumer<OrcFilterContext> skipRowCallback = null;
private DataReader dataReader = null;
private Boolean tolerateMissingSchema = null;
private boolean forcePositionalEvolution;
private boolean isSchemaEvolutionCaseAware =
(boolean) OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getDefaultValue();
private boolean includeAcidColumns = true;
private boolean allowSARGToFilter = false;
private boolean useSelected = false;
private boolean allowPluginFilters = false;
private List<String> pluginAllowListFilters = null;
private int minSeekSize = (int) OrcConf.ORC_MIN_DISK_SEEK_SIZE.getDefaultValue();
private double minSeekSizeTolerance = (double) OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE
.getDefaultValue();
private int rowBatchSize = (int) OrcConf.ROW_BATCH_SIZE.getDefaultValue();
/**
* @since 1.1.0
*/
public Options() {
// PASS
}
/**
* @since 1.1.0
*/
public Options(Configuration conf) {
useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(conf);
skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
isSchemaEvolutionCaseAware =
OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
allowSARGToFilter = OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf);
useSelected = OrcConf.READER_USE_SELECTED.getBoolean(conf);
allowPluginFilters = OrcConf.ALLOW_PLUGIN_FILTER.getBoolean(conf);
pluginAllowListFilters = OrcConf.PLUGIN_FILTER_ALLOWLIST.getStringAsList(conf);
minSeekSize = OrcConf.ORC_MIN_DISK_SEEK_SIZE.getInt(conf);
minSeekSizeTolerance = OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE.getDouble(conf);
rowBatchSize = OrcConf.ROW_BATCH_SIZE.getInt(conf);
}
/**
* Set the list of columns to read.
* @param include a list of columns to read
* @return this
* @since 1.1.0
*/
public Options include(boolean[] include) {
this.include = include;
return this;
}
/**
* Set the range of bytes to read
* @param offset the starting byte offset
* @param length the number of bytes to read
* @return this
* @since 1.1.0
*/
public Options range(long offset, long length) {
this.offset = offset;
this.length = length;
return this;
}
/**
* Set the schema on read type description.
* @since 1.1.0
*/
public Options schema(TypeDescription schema) {
this.schema = schema;
return this;
}
/**
* Set a row level filter.
* This is an advanced feature that allows the caller to specify
* a list of columns that are read first and then a filter that
* is called to determine which rows if any should be read.
*
* User should expect the batches that come from the reader
* to use the selected array set by their filter.
*
* Use cases for this are predicates that SearchArgs can't represent,
* such as relationships between columns (eg. columnA == columnB).
* @param filterColumnNames a comma separated list of the column names that
* are read before the filter is applied. Only top
* level columns in the reader's schema can be used
* here and they must not be duplicated.
* @param filterCallback a function callback to perform filtering during the call to
* RecordReader.nextBatch. This function should not reference
* any static fields nor modify the passed in ColumnVectors but
* should set the filter output using the selected array.
*
* @return this
* @since 1.7.0
*/
public Options setRowFilter(
String[] filterColumnNames, Consumer<OrcFilterContext> filterCallback) {
this.preFilterColumns = filterColumnNames;
this.skipRowCallback = filterCallback;
return this;
}
/**
* Set search argument for predicate push down.
* @param sarg the search argument
* @param columnNames the column names for
* @return this
* @since 1.1.0
*/
public Options searchArgument(SearchArgument sarg, String[] columnNames) {
this.sarg = sarg;
this.columnNames = columnNames;
return this;
}
/**
* Set allowSARGToFilter.
* @param allowSARGToFilter
* @return this
* @since 1.7.0
*/
public Options allowSARGToFilter(boolean allowSARGToFilter) {
this.allowSARGToFilter = allowSARGToFilter;
return this;
}
/**
* Get allowSARGToFilter value.
* @return allowSARGToFilter
* @since 1.7.0
*/
public boolean isAllowSARGToFilter() {
return allowSARGToFilter;
}
/**
* Set whether to use zero copy from HDFS.
* @param value the new zero copy flag
* @return this
* @since 1.1.0
*/
public Options useZeroCopy(boolean value) {
this.useZeroCopy = value;
return this;
}
/**
* Set dataReader.
* @param value the new dataReader.
* @return this
* @since 1.1.0
*/
public Options dataReader(DataReader value) {
this.dataReader = value;
return this;
}
/**
* Set whether to skip corrupt records.
* @param value the new skip corrupt records flag
* @return this
* @since 1.1.0
*/
public Options skipCorruptRecords(boolean value) {
this.skipCorruptRecords = value;
return this;
}
/**
* Set whether to make a best effort to tolerate schema evolution for files
* which do not have an embedded schema because they were written with a'
* pre-HIVE-4243 writer.
* @param value the new tolerance flag
* @return this
* @since 1.2.0
*/
public Options tolerateMissingSchema(boolean value) {
this.tolerateMissingSchema = value;
return this;
}
/**
* Set whether to force schema evolution to be positional instead of
* based on the column names.
* @param value force positional evolution
* @return this
* @since 1.3.0
*/
public Options forcePositionalEvolution(boolean value) {
this.forcePositionalEvolution = value;
return this;
}
/**
* Set number of levels to force schema evolution to be positional instead of
* based on the column names.
* @param value number of levels of positional schema evolution
* @return this
* @since 1.5.11
*/
public Options positionalEvolutionLevel(int value) {
this.positionalEvolutionLevel = value;
return this;
}
/**
* Set boolean flag to determine if the comparison of field names in schema
* evolution is case sensitive
* @param value the flag for schema evolution is case sensitive or not.
* @return this
* @since 1.5.0
*/
public Options isSchemaEvolutionCaseAware(boolean value) {
this.isSchemaEvolutionCaseAware = value;
return this;
}
/**
* {@code true} if acid metadata columns should be decoded otherwise they will
* be set to {@code null}.
* @since 1.5.3
*/
public Options includeAcidColumns(boolean includeAcidColumns) {
this.includeAcidColumns = includeAcidColumns;
return this;
}
/**
* @since 1.1.0
*/
public boolean[] getInclude() {
return include;
}
/**
* @since 1.1.0
*/
public long getOffset() {
return offset;
}
/**
* @since 1.1.0
*/
public long getLength() {
return length;
}
/**
* @since 1.1.0
*/
public TypeDescription getSchema() {
return schema;
}
/**
* @since 1.1.0
*/
public SearchArgument getSearchArgument() {
return sarg;
}
/**
* @since 1.7.0
*/
public Consumer<OrcFilterContext> getFilterCallback() {
return skipRowCallback;
}
/**
* @since 1.7.0
*/
public String[] getPreFilterColumnNames(){
return preFilterColumns;
}
/**
* @since 1.1.0
*/
public String[] getColumnNames() {
return columnNames;
}
/**
* @since 1.1.0
*/
public long getMaxOffset() {
long result = offset + length;
if (result < 0) {
result = Long.MAX_VALUE;
}
return result;
}
/**
* @since 1.1.0
*/
public Boolean getUseZeroCopy() {
return useZeroCopy;
}
/**
* @since 1.1.0
*/
public Boolean getSkipCorruptRecords() {
return skipCorruptRecords;
}
/**
* @since 1.1.0
*/
public DataReader getDataReader() {
return dataReader;
}
/**
* @since 1.3.0
*/
public boolean getForcePositionalEvolution() {
return forcePositionalEvolution;
}
/**
* @since 1.5.11
*/
public int getPositionalEvolutionLevel() {
return positionalEvolutionLevel;
}
/**
* @since 1.5.0
*/
public boolean getIsSchemaEvolutionCaseAware() {
return isSchemaEvolutionCaseAware;
}
/**
* @since 1.5.3
*/
public boolean getIncludeAcidColumns() {
return includeAcidColumns;
}
/**
* @since 1.1.0
*/
@Override
public Options clone() {
try {
Options result = (Options) super.clone();
if (dataReader != null) {
result.dataReader = dataReader.clone();
}
return result;
} catch (CloneNotSupportedException e) {
throw new UnsupportedOperationException("uncloneable", e);
}
}
/**
* @since 1.1.0
*/
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("{include: ");
if (include == null) {
buffer.append("null");
} else {
buffer.append("[");
for(int i=0; i < include.length; ++i) {
if (i != 0) {
buffer.append(", ");
}
buffer.append(include[i]);
}
buffer.append("]");
}
buffer.append(", offset: ");
buffer.append(offset);
buffer.append(", length: ");
buffer.append(length);
if (sarg != null) {
buffer.append(", sarg: ");
buffer.append(sarg);
}
if (schema != null) {
buffer.append(", schema: ");
schema.printToBuffer(buffer);
}
buffer.append(", includeAcidColumns: ").append(includeAcidColumns);
buffer.append(", allowSARGToFilter: ").append(allowSARGToFilter);
buffer.append(", useSelected: ").append(useSelected);
buffer.append("}");
return buffer.toString();
}
/**
* @since 1.2.0
*/
public boolean getTolerateMissingSchema() {
return tolerateMissingSchema != null ? tolerateMissingSchema :
(Boolean) OrcConf.TOLERATE_MISSING_SCHEMA.getDefaultValue();
}
/**
* @since 1.7.0
*/
public boolean useSelected() {
return useSelected;
}
/**
* @since 1.7.0
*/
public Options useSelected(boolean newValue) {
this.useSelected = newValue;
return this;
}
public boolean allowPluginFilters() {
return allowPluginFilters;
}
public Options allowPluginFilters(boolean allowPluginFilters) {
this.allowPluginFilters = allowPluginFilters;
return this;
}
public List<String> pluginAllowListFilters() {
return pluginAllowListFilters;
}
public Options pluginAllowListFilters(String... allowLists) {
this.pluginAllowListFilters = Arrays.asList(allowLists);
return this;
}
/**
* @since 1.8.0
*/
public int minSeekSize() {
return minSeekSize;
}
/**
* @since 1.8.0
*/
public Options minSeekSize(int minSeekSize) {
this.minSeekSize = minSeekSize;
return this;
}
/**
* @since 1.8.0
*/
public double minSeekSizeTolerance() {
return minSeekSizeTolerance;
}
/**
* @since 1.8.0
*/
public Options minSeekSizeTolerance(double value) {
this.minSeekSizeTolerance = value;
return this;
}
/**
* @since 1.9.0
*/
public int getRowBatchSize() {
return rowBatchSize;
}
/**
* @since 1.9.0
*/
public Options rowBatchSize(int value) {
this.rowBatchSize = value;
return this;
}
}