in parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnValueCollector.java [105:187]
void initBloomFilter(BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
this.bloomFilterWriter = bloomFilterWriter;
if (bloomFilterWriter == null) {
this.bloomFilter = new BloomFilter() {
@Override
public void writeTo(OutputStream out) throws IOException {}
@Override
public void insertHash(long hash) {}
@Override
public boolean findHash(long hash) {
return false;
}
@Override
public int getBitsetSize() {
return 0;
}
@Override
public long hash(int value) {
return 0;
}
@Override
public long hash(long value) {
return 0;
}
@Override
public long hash(double value) {
return 0;
}
@Override
public long hash(float value) {
return 0;
}
@Override
public long hash(Binary value) {
return 0;
}
@Override
public long hash(Object value) {
return 0;
}
@Override
public HashStrategy getHashStrategy() {
return null;
}
@Override
public Algorithm getAlgorithm() {
return null;
}
@Override
public Compression getCompression() {
return null;
}
};
return;
}
int maxBloomFilterSize = props.getMaxBloomFilterBytes();
OptionalLong ndv = props.getBloomFilterNDV(path);
OptionalDouble fpp = props.getBloomFilterFPP(path);
// If user specify the column NDV, we construct Bloom filter from it.
if (ndv.isPresent()) {
int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(ndv.getAsLong(), fpp.getAsDouble());
this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits / 8, maxBloomFilterSize);
} else if (props.getAdaptiveBloomFilterEnabled(path)) {
int numCandidates = props.getBloomFilterCandidatesCount(path);
this.bloomFilter =
new AdaptiveBlockSplitBloomFilter(maxBloomFilterSize, numCandidates, fpp.getAsDouble(), path);
} else {
this.bloomFilter = new BlockSplitBloomFilter(maxBloomFilterSize, maxBloomFilterSize);
}
}