crates/paimon/src/spec/data_file.rs (68 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. use crate::spec::stats::BinaryTableStats; use chrono::serde::ts_milliseconds::deserialize as from_millis; use chrono::serde::ts_milliseconds::serialize as to_millis; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::fmt::{Display, Formatter}; pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0); /// An implementation of InternalRow. /// /// Impl Reference: <https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/data/BinaryRow.java> #[derive(Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct BinaryRow { arity: i32, null_bits_size_in_bytes: i32, } impl BinaryRow { pub const HEADER_SIZE_IN_BYTES: i32 = 8; pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 { ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8 } pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 { Self::cal_bit_set_width_in_bytes(arity) + 8 * arity } pub const fn new(arity: i32) -> Self { Self { arity, null_bits_size_in_bytes: (arity + 7) / 8, } } } /// Metadata of a data file. /// /// Impl References: <https://github.com/apache/paimon/blob/release-0.8.2/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java> #[derive(Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct DataFileMeta { #[serde(rename = "_FILE_NAME")] pub file_name: String, #[serde(rename = "_FILE_SIZE")] pub file_size: i64, // row_count tells the total number of rows (including add & delete) in this file. #[serde(rename = "_ROW_COUNT")] pub row_count: i64, #[serde(rename = "_MIN_KEY", with = "serde_bytes")] pub min_key: Vec<u8>, #[serde(rename = "_MAX_KEY", with = "serde_bytes")] pub max_key: Vec<u8>, #[serde(rename = "_KEY_STATS")] pub key_stats: BinaryTableStats, #[serde(rename = "_VALUE_STATS")] pub value_stats: BinaryTableStats, #[serde(rename = "_MIN_SEQUENCE_NUMBER")] pub min_sequence_number: i64, #[serde(rename = "_MAX_SEQUENCE_NUMBER")] pub max_sequence_number: i64, #[serde(rename = "_SCHEMA_ID")] pub schema_id: i64, #[serde(rename = "_LEVEL")] pub level: i32, #[serde(rename = "_EXTRA_FILES")] pub extra_files: Vec<String>, #[serde( rename = "_CREATION_TIME", serialize_with = "to_millis", deserialize_with = "from_millis" )] pub creation_time: DateTime<Utc>, #[serde(rename = "_DELETE_ROW_COUNT")] // rowCount = add_row_count + delete_row_count. pub delete_row_count: Option<i64>, // file index filter bytes, if it is small, store in data file meta #[serde(rename = "_EMBEDDED_FILE_INDEX", with = "serde_bytes")] pub embedded_index: Option<Vec<u8>>, } impl Display for DataFileMeta { fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { todo!() } } #[allow(dead_code)] impl DataFileMeta {}