private Stream computeColumnStatsFromMetadataTable()

in xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java [124:168]


  private Stream<InternalDataFile> computeColumnStatsFromMetadataTable(
      HoodieTableMetadata metadataTable,
      Stream<InternalDataFile> files,
      Map<String, InternalField> nameFieldMap) {
    Map<Pair<String, String>, InternalDataFile> filePathsToDataFile =
        files.collect(
            Collectors.toMap(
                file -> getPartitionAndFileName(file.getPhysicalPath()), Function.identity()));
    if (filePathsToDataFile.isEmpty()) {
      return Stream.empty();
    }
    List<Pair<String, String>> filePaths = new ArrayList<>(filePathsToDataFile.keySet());
    Map<Pair<String, String>, List<Pair<InternalField, HoodieMetadataColumnStats>>> stats =
        nameFieldMap.entrySet().parallelStream()
            .flatMap(
                fieldNameToField -> {
                  String fieldName = fieldNameToField.getKey();
                  InternalField field = fieldNameToField.getValue();
                  return metadataTable.getColumnStats(filePaths, fieldName).entrySet().stream()
                      .map(
                          filePairToStats ->
                              Pair.of(
                                  filePairToStats.getKey(),
                                  Pair.of(field, filePairToStats.getValue())));
                })
            .collect(
                Collectors.groupingBy(
                    Map.Entry::getKey,
                    Collectors.mapping(
                        Map.Entry::getValue, CustomCollectors.toList(nameFieldMap.size()))));
    return filePathsToDataFile.entrySet().stream()
        .map(
            pathToDataFile -> {
              Pair<String, String> filePath = pathToDataFile.getKey();
              InternalDataFile file = pathToDataFile.getValue();
              List<Pair<InternalField, HoodieMetadataColumnStats>> fileStats =
                  stats.getOrDefault(filePath, Collections.emptyList());
              List<ColumnStat> columnStats =
                  fileStats.stream()
                      .map(pair -> getColumnStatFromHudiStat(pair.getLeft(), pair.getRight()))
                      .collect(CustomCollectors.toList(fileStats.size()));
              long recordCount = getMaxFromColumnStats(columnStats).orElse(0L);
              return file.toBuilder().columnStats(columnStats).recordCount(recordCount).build();
            });
  }