/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } } namespace local_engine { class FormatFile { public: struct InputFormat { public: std::unique_ptr read_buffer; DB::InputFormatPtr input; }; using InputFormatPtr = std::shared_ptr; FormatFile( DB::ContextPtr context_, const substrait::ReadRel::LocalFiles::FileOrFiles & file_info_, ReadBufferBuilderPtr read_buffer_builder_); virtual ~FormatFile() = default; /// Create a new input format for reading this file virtual InputFormatPtr createInputFormat(const DB::Block & header) = 0; /// Spark would split a large file into small segements and read in different tasks /// If this file doesn't support the split feacture, only the task with offset 0 will generate data. virtual bool supportSplit() const { return false; } /// Try to get rows from file metadata virtual std::optional getTotalRows() { return {}; } /// Get partition keys from file path inline const std::vector & getFilePartitionKeys() const { return partition_keys; } inline const std::map & getFilePartitionValues() const { return partition_values; } virtual String getURIPath() const { return file_info.uri_file(); } virtual size_t getStartOffset() const { return file_info.start(); } virtual size_t getLength() const { return file_info.length(); } virtual String getFileFormat() const = 0; protected: DB::ContextPtr context; substrait::ReadRel::LocalFiles::FileOrFiles file_info; ReadBufferBuilderPtr read_buffer_builder; std::vector partition_keys; std::map partition_values; // std::optional filter; std::shared_ptr key_condition; }; using FormatFilePtr = std::shared_ptr; using FormatFiles = std::vector; class FormatFileUtil { public: static FormatFilePtr createFile(DB::ContextPtr context, ReadBufferBuilderPtr read_buffer_builder, const substrait::ReadRel::LocalFiles::FileOrFiles & file); }; }