cpp/core/memory/ColumnarBatch.h (55 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#include "arrow/c/bridge.h"
#include "arrow/c/helpers.h"
#include "arrow/record_batch.h"
#include "memory/MemoryManager.h"
#include "utils/ArrowStatus.h"
#include "utils/Exception.h"
namespace gluten {
class ColumnarBatch {
public:
ColumnarBatch(int32_t numColumns, int32_t numRows);
virtual ~ColumnarBatch() = default;
int32_t numColumns() const;
int32_t numRows() const;
virtual std::string getType() const = 0;
virtual int64_t numBytes() = 0;
virtual std::shared_ptr<ArrowArray> exportArrowArray() = 0;
virtual std::shared_ptr<ArrowSchema> exportArrowSchema() = 0;
virtual int64_t getExportNanos() const;
// Serializes one single row to byte array that can be accessed as Spark-compatible unsafe row.
virtual std::vector<char> toUnsafeRow(int32_t rowId) const;
friend std::ostream& operator<<(std::ostream& os, const ColumnarBatch& columnarBatch);
private:
int32_t numColumns_;
int32_t numRows_;
protected:
int64_t exportNanos_;
};
class ArrowColumnarBatch final : public ColumnarBatch {
public:
explicit ArrowColumnarBatch(std::shared_ptr<arrow::RecordBatch> batch);
std::string getType() const override;
int64_t numBytes() override;
arrow::RecordBatch* getRecordBatch() const;
std::shared_ptr<ArrowSchema> exportArrowSchema() override;
std::shared_ptr<ArrowArray> exportArrowArray() override;
std::vector<char> toUnsafeRow(int32_t rowId) const override;
private:
std::shared_ptr<arrow::RecordBatch> batch_;
};
class ArrowCStructColumnarBatch final : public ColumnarBatch {
public:
ArrowCStructColumnarBatch(std::unique_ptr<ArrowSchema> cSchema, std::unique_ptr<ArrowArray> cArray);
~ArrowCStructColumnarBatch() override;
std::string getType() const override;
int64_t numBytes() override;
std::shared_ptr<ArrowSchema> exportArrowSchema() override;
std::shared_ptr<ArrowArray> exportArrowArray() override;
std::vector<char> toUnsafeRow(int32_t rowId) const override;
private:
std::shared_ptr<ArrowSchema> cSchema_ = std::make_shared<ArrowSchema>();
std::shared_ptr<ArrowArray> cArray_ = std::make_shared<ArrowArray>();
};
std::shared_ptr<ColumnarBatch> createZeroColumnBatch(int32_t numRows);
} // namespace gluten