crates/paimon/src/spec/data_file.rs (68 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::spec::stats::BinaryTableStats;
use chrono::serde::ts_milliseconds::deserialize as from_millis;
use chrono::serde::ts_milliseconds::serialize as to_millis;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter};
pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
/// An implementation of InternalRow.
///
/// Impl Reference: <https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/data/BinaryRow.java>
#[derive(Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct BinaryRow {
arity: i32,
null_bits_size_in_bytes: i32,
}
impl BinaryRow {
pub const HEADER_SIZE_IN_BYTES: i32 = 8;
pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
}
pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
}
pub const fn new(arity: i32) -> Self {
Self {
arity,
null_bits_size_in_bytes: (arity + 7) / 8,
}
}
}
/// Metadata of a data file.
///
/// Impl References: <https://github.com/apache/paimon/blob/release-0.8.2/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
#[derive(Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DataFileMeta {
#[serde(rename = "_FILE_NAME")]
pub file_name: String,
#[serde(rename = "_FILE_SIZE")]
pub file_size: i64,
// row_count tells the total number of rows (including add & delete) in this file.
#[serde(rename = "_ROW_COUNT")]
pub row_count: i64,
#[serde(rename = "_MIN_KEY", with = "serde_bytes")]
pub min_key: Vec<u8>,
#[serde(rename = "_MAX_KEY", with = "serde_bytes")]
pub max_key: Vec<u8>,
#[serde(rename = "_KEY_STATS")]
pub key_stats: BinaryTableStats,
#[serde(rename = "_VALUE_STATS")]
pub value_stats: BinaryTableStats,
#[serde(rename = "_MIN_SEQUENCE_NUMBER")]
pub min_sequence_number: i64,
#[serde(rename = "_MAX_SEQUENCE_NUMBER")]
pub max_sequence_number: i64,
#[serde(rename = "_SCHEMA_ID")]
pub schema_id: i64,
#[serde(rename = "_LEVEL")]
pub level: i32,
#[serde(rename = "_EXTRA_FILES")]
pub extra_files: Vec<String>,
#[serde(
rename = "_CREATION_TIME",
serialize_with = "to_millis",
deserialize_with = "from_millis"
)]
pub creation_time: DateTime<Utc>,
#[serde(rename = "_DELETE_ROW_COUNT")]
// rowCount = add_row_count + delete_row_count.
pub delete_row_count: Option<i64>,
// file index filter bytes, if it is small, store in data file meta
#[serde(rename = "_EMBEDDED_FILE_INDEX", with = "serde_bytes")]
pub embedded_index: Option<Vec<u8>>,
}
impl Display for DataFileMeta {
fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
todo!()
}
}
#[allow(dead_code)]
impl DataFileMeta {}