hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [108:161]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), "One or more specified columns does not exist in the hudi table."); List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) .collect(Collectors.toList()); int[] selParquetFields = Arrays.stream(selectedFields) .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) .toArray(); ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { // create and initialize the row batch ColumnVector[] vectors = new ColumnVector[selectedFields.length]; for (int i = 0; i < vectors.length; i++) { String name = fullFieldNames[selectedFields[i]]; LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); } return new VectorizedColumnBatch(vectors); }; return new ParquetColumnarRowSplitReader( utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields) .mapToObj(i -> fullFieldTypes[i].getLogicalType()) .toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength, filterPredicate, recordFilter); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [108:161]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), "One or more specified columns does not exist in the hudi table."); List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) .collect(Collectors.toList()); int[] selParquetFields = Arrays.stream(selectedFields) .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) .toArray(); ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { // create and initialize the row batch ColumnVector[] vectors = new ColumnVector[selectedFields.length]; for (int i = 0; i < vectors.length; i++) { String name = fullFieldNames[selectedFields[i]]; LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); } return new VectorizedColumnBatch(vectors); }; return new ParquetColumnarRowSplitReader( utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields) .mapToObj(i -> fullFieldTypes[i].getLogicalType()) .toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength, filterPredicate, recordFilter); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [108:161]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), "One or more specified columns does not exist in the hudi table."); List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) .collect(Collectors.toList()); int[] selParquetFields = Arrays.stream(selectedFields) .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) .toArray(); ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { // create and initialize the row batch ColumnVector[] vectors = new ColumnVector[selectedFields.length]; for (int i = 0; i < vectors.length; i++) { String name = fullFieldNames[selectedFields[i]]; LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); } return new VectorizedColumnBatch(vectors); }; return new ParquetColumnarRowSplitReader( utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields) .mapToObj(i -> fullFieldTypes[i].getLogicalType()) .toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength, filterPredicate, recordFilter); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [108:161]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), "One or more specified columns does not exist in the hudi table."); List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) .collect(Collectors.toList()); int[] selParquetFields = Arrays.stream(selectedFields) .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) .toArray(); ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { // create and initialize the row batch ColumnVector[] vectors = new ColumnVector[selectedFields.length]; for (int i = 0; i < vectors.length; i++) { String name = fullFieldNames[selectedFields[i]]; LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); } return new VectorizedColumnBatch(vectors); }; return new ParquetColumnarRowSplitReader( utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields) .mapToObj(i -> fullFieldTypes[i].getLogicalType()) .toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength, filterPredicate, recordFilter); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [108:161]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), "One or more specified columns does not exist in the hudi table."); List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) .collect(Collectors.toList()); int[] selParquetFields = Arrays.stream(selectedFields) .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) .toArray(); ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { // create and initialize the row batch ColumnVector[] vectors = new ColumnVector[selectedFields.length]; for (int i = 0; i < vectors.length; i++) { String name = fullFieldNames[selectedFields[i]]; LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); } return new VectorizedColumnBatch(vectors); }; return new ParquetColumnarRowSplitReader( utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields) .mapToObj(i -> fullFieldTypes[i].getLogicalType()) .toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength, filterPredicate, recordFilter); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -