python/pyfury/format/vectorized.pxi (27 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from libcpp.memory cimport shared_ptr
from libc.stdint cimport *
from pyfury.includes.libformat cimport CArrowWriter
from pyarrow.lib cimport CMemoryPool, CRecordBatch
from pyarrow.lib cimport Schema, MemoryPool, check_status
import pyarrow as pa
cimport pyarrow.lib as libpa
cdef class ArrowWriter:
cdef:
shared_ptr[CSchema] c_schema
CMemoryPool *c_pool
shared_ptr[CArrowWriter] c_arrow_writer
def __init__(self, Schema schema, MemoryPool pool=None):
self.c_schema = schema.sp_schema
if pool is None:
pool = pa.default_memory_pool()
self.c_pool = pool.pool
check_status(CArrowWriter.Make(
self.c_schema, self.c_pool, &self.c_arrow_writer))
def write(self, RowData row):
check_status(self.c_arrow_writer.get().Write(row.data))
def finish(self):
cdef shared_ptr[CRecordBatch] batch
check_status(self.c_arrow_writer.get().Finish(&batch))
return libpa.pyarrow_wrap_batch(batch)
def reset(self):
self.c_arrow_writer.get().Reset()