dev/benchmarks/python/array.py (54 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import nanoarrow as na
class CArrayBuilderSuite:
"""
Benchmarks for building CArrays
"""
def setup(self):
self.py_integers = list(range(int(1e6)))
self.py_bools = [False, True, True, False] * int(1e6 // 4)
self.wide_schema = na.c_schema(na.struct([na.int32()] * 10000))
self.children = [na.c_array(self.py_integers, na.int32())] * 10000
def time_build_c_array_int32(self):
"""Create an int32 array from 1,000,000 Python integers"""
na.c_array(self.py_integers, na.int32())
def time_build_c_array_bool(self):
"""Create a bool array from 1,000,000 Python booleans"""
na.c_array(self.py_bools, na.bool_())
def time_build_c_array_struct_wide(self):
"""Create a struct array with 10,000 columns"""
na.c_array_from_buffers(self.wide_schema, 1e6, [None], children=self.children)
class ArrayIterationSuite:
"""Benchmarks for consuming an Array using various methods of iteration"""
def setup(self):
self.integers = na.Array(range(int(1e6)), na.int32())
n = int(1e6)
item_size = 7
alphabet = b"abcdefghijklmnopqrstuvwxyz"
n_alphabets = (item_size * n) // len(alphabet) + 1
data_buffer = alphabet * n_alphabets
offsets_buffer = na.c_buffer(
range(0, (n + 1) * item_size, item_size), na.int32()
)
c_strings = na.c_array_from_buffers(
na.string(), n, [None, offsets_buffer, data_buffer]
)
self.strings = na.Array(c_strings)
c_long_struct = na.c_array_from_buffers(
na.struct([na.int32()] * 100),
length=10000,
buffers=[None],
children=[na.c_array(range(10000), na.int32())] * 100,
)
self.long_struct = na.Array(c_long_struct)
c_wide_struct = na.c_array_from_buffers(
na.struct([na.int32()] * 10000),
length=100,
buffers=[None],
children=[na.c_array(range(100), na.int32())] * 10000,
)
self.wide_struct = na.Array(c_wide_struct)
def time_integers_to_list(self):
"""Consume an int32 array with 1,000,000 elements into a Python list"""
list(self.integers.iter_py())
def time_strings_to_list(self):
"""Consume a string array with 1,000,000 elements into a Python list"""
list(self.strings.iter_py())
def time_long_struct_to_dict_list(self):
"""Consume an struct array with 10,000 elements and 100 columns into a list
of dictionaries
"""
list(self.long_struct.iter_py())
def time_long_struct_to_tuple_list(self):
"""Consume an struct array with 10,000 elements and 100 columns into a list
of tuples
"""
list(self.long_struct.iter_tuples())
def time_wide_struct_to_dict_list(self):
"""Consume an struct array with 100 elements and 10,000 columns into a list
of dictionaries
"""
list(self.wide_struct.iter_py())
def time_wide_struct_to_tuple_list(self):
"""Consume an struct array with 100 elements and 10,000 columns into a list
of tuples
"""
list(self.wide_struct.iter_tuples())