in src/parquet/column_writer.cc [662:719]
inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
const int16_t* def_levels,
const int16_t* rep_levels,
const T* values) {
int64_t values_to_write = 0;
// If the field is required and non-repeated, there are no definition levels
if (descr_->max_definition_level() > 0) {
for (int64_t i = 0; i < num_values; ++i) {
if (def_levels[i] == descr_->max_definition_level()) {
++values_to_write;
}
}
WriteDefinitionLevels(num_values, def_levels);
} else {
// Required field, write all values
values_to_write = num_values;
}
// Not present for non-repeated fields
if (descr_->max_repetition_level() > 0) {
// A row could include more than one value
// Count the occasions where we start a new row
for (int64_t i = 0; i < num_values; ++i) {
if (rep_levels[i] == 0) {
rows_written_++;
}
}
WriteRepetitionLevels(num_values, rep_levels);
} else {
// Each value is exactly one row
rows_written_ += static_cast<int>(num_values);
}
// PARQUET-780
if (values_to_write > 0) {
DCHECK(nullptr != values) << "Values ptr cannot be NULL";
}
WriteValues(values_to_write, values);
if (page_statistics_ != nullptr) {
page_statistics_->Update(values, values_to_write, num_values - values_to_write);
}
num_buffered_values_ += num_values;
num_buffered_encoded_values_ += values_to_write;
if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
AddDataPage();
}
if (has_dictionary_ && !fallback_) {
CheckDictionarySizeLimit();
}
return values_to_write;
}