cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.h (60 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#include <optional>
#include <vector>
#include <Core/Block.h>
#include <IO/ReadBuffer.h>
#include <Interpreters/Context.h>
#include <Processors/Formats/IInputFormat.h>
#include <Storages/SubstraitSource/ReadBufferBuilder.h>
#include <Storages/SubstraitSource/SubstraitFileSourceStep.h>
#include <substrait/plan.pb.h>
#include <Parser/TypeParser.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
}
namespace local_engine
{
class FormatFile
{
public:
struct InputFormat
{
public:
std::unique_ptr<DB::ReadBuffer> read_buffer;
DB::InputFormatPtr input;
};
using InputFormatPtr = std::shared_ptr<InputFormat>;
FormatFile(
DB::ContextPtr context_, const substrait::ReadRel::LocalFiles::FileOrFiles & file_info_, ReadBufferBuilderPtr read_buffer_builder_);
virtual ~FormatFile() = default;
/// Create a new input format for reading this file
virtual InputFormatPtr createInputFormat(const DB::Block & header) = 0;
/// Spark would split a large file into small segements and read in different tasks
/// If this file doesn't support the split feacture, only the task with offset 0 will generate data.
virtual bool supportSplit() const { return false; }
/// Try to get rows from file metadata
virtual std::optional<size_t> getTotalRows() { return {}; }
/// Get partition keys from file path
inline const std::vector<String> & getFilePartitionKeys() const { return partition_keys; }
inline const std::map<String, String> & getFilePartitionValues() const { return partition_values; }
virtual String getURIPath() const { return file_info.uri_file(); }
virtual size_t getStartOffset() const { return file_info.start(); }
virtual size_t getLength() const { return file_info.length(); }
virtual String getFileFormat() const = 0;
protected:
DB::ContextPtr context;
substrait::ReadRel::LocalFiles::FileOrFiles file_info;
ReadBufferBuilderPtr read_buffer_builder;
std::vector<String> partition_keys;
std::map<String, String> partition_values;
// std::optional<SourceFilter> filter;
std::shared_ptr<const DB::KeyCondition> key_condition;
};
using FormatFilePtr = std::shared_ptr<FormatFile>;
using FormatFiles = std::vector<FormatFilePtr>;
class FormatFileUtil
{
public:
static FormatFilePtr
createFile(DB::ContextPtr context, ReadBufferBuilderPtr read_buffer_builder, const substrait::ReadRel::LocalFiles::FileOrFiles & file);
};
}