be/src/olap/merger.h (58 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include <vector> #include "common/status.h" #include "io/io_common.h" #include "olap/iterators.h" #include "olap/rowset/rowset_fwd.h" #include "olap/simple_rowid_conversion.h" #include "olap/tablet_fwd.h" namespace doris { class KeyBoundsPB; class RowIdConversion; class RowsetWriter; namespace segment_v2 { class SegmentWriter; } // namespace segment_v2 namespace vectorized { class RowSourcesBuffer; class VerticalBlockReader; }; // namespace vectorized class Merger { public: struct Statistics { // number of rows written to the destination rowset after merge int64_t output_rows = 0; int64_t merged_rows = 0; int64_t filtered_rows = 0; RowIdConversion* rowid_conversion = nullptr; }; // merge rows from `src_rowset_readers` and write into `dst_rowset_writer`. // return OK and set statistics into `*stats_output`. // return others on error static Status vmerge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type, const TabletSchema& cur_tablet_schema, const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, RowsetWriter* dst_rowset_writer, Statistics* stats_output); static Status vertical_merge_rowsets( BaseTabletSPtr tablet, ReaderType reader_type, const TabletSchema& tablet_schema, const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, int64_t merge_way_num, Statistics* stats_output); // for vertical compaction static void vertical_split_columns(const TabletSchema& tablet_schema, std::vector<std::vector<uint32_t>>* column_groups, std::vector<uint32_t>* key_group_cluster_key_idxes); static Status vertical_compact_one_group( BaseTabletSPtr tablet, ReaderType reader_type, const TabletSchema& tablet_schema, bool is_key, const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf, const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, Statistics* stats_output, std::vector<uint32_t> key_group_cluster_key_idxes, int64_t batch_size, CompactionSampleInfo* sample_info); // for segcompaction static Status vertical_compact_one_group(int64_t tablet_id, ReaderType reader_type, const TabletSchema& tablet_schema, bool is_key, const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf, vectorized::VerticalBlockReader& src_block_reader, segment_v2::SegmentWriter& dst_segment_writer, Statistics* stats_output, uint64_t* index_size, KeyBoundsPB& key_bounds, SimpleRowIdConversion* rowid_conversion); }; } // namespace doris