in src/parquet/column_writer.cc [722:790]
inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels,
const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values,
int64_t* num_spaced_written) {
int64_t values_to_write = 0;
int64_t spaced_values_to_write = 0;
// If the field is required and non-repeated, there are no definition levels
if (descr_->max_definition_level() > 0) {
// Minimal definition level for which spaced values are written
int16_t min_spaced_def_level = descr_->max_definition_level();
if (descr_->schema_node()->is_optional()) {
min_spaced_def_level--;
}
for (int64_t i = 0; i < num_values; ++i) {
if (def_levels[i] == descr_->max_definition_level()) {
++values_to_write;
}
if (def_levels[i] >= min_spaced_def_level) {
++spaced_values_to_write;
}
}
WriteDefinitionLevels(num_values, def_levels);
} else {
// Required field, write all values
values_to_write = num_values;
spaced_values_to_write = num_values;
}
// Not present for non-repeated fields
if (descr_->max_repetition_level() > 0) {
// A row could include more than one value
// Count the occasions where we start a new row
for (int64_t i = 0; i < num_values; ++i) {
if (rep_levels[i] == 0) {
rows_written_++;
}
}
WriteRepetitionLevels(num_values, rep_levels);
} else {
// Each value is exactly one row
rows_written_ += static_cast<int>(num_values);
}
if (descr_->schema_node()->is_optional()) {
WriteValuesSpaced(spaced_values_to_write, valid_bits, valid_bits_offset, values);
} else {
WriteValues(values_to_write, values);
}
*num_spaced_written = spaced_values_to_write;
if (page_statistics_ != nullptr) {
page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, values_to_write,
num_values - values_to_write);
}
num_buffered_values_ += num_values;
num_buffered_encoded_values_ += values_to_write;
if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
AddDataPage();
}
if (has_dictionary_ && !fallback_) {
CheckDictionarySizeLimit();
}
return values_to_write;
}