dev/benchmarks/generate-fixtures.py (51 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import numpy as np
import pyarrow as pa
from pyarrow import ipc
def write_fixture(schema, batch_generator, fixture_name, fixtures_dir=None):
if fixtures_dir is None:
fixtures_dir = os.getcwd()
with ipc.new_stream(os.path.join(fixtures_dir, fixture_name), schema) as out:
for batch in batch_generator:
out.write_batch(batch)
def write_fixture_float64(
fixture_name,
num_cols=10,
num_batches=2,
batch_size=65536,
seed=1938,
fixtures_dir=None,
):
"""
Writes a fixture containing random float64 columns in various configurations.
"""
generator = np.random.default_rng(seed=seed)
schema = pa.schema({f"col{i}": pa.float64() for i in range(num_cols)})
def gen_batches():
for _ in range(num_batches):
arrays = [np.array(generator.random(batch_size)) for _ in range(num_cols)]
yield pa.record_batch(arrays, names=[f"col{i}" for i in range(num_cols)])
write_fixture(schema, gen_batches(), fixture_name, fixtures_dir=fixtures_dir)
if __name__ == "__main__":
this_dir = os.path.dirname(__file__)
fixtures_dir = os.path.join(this_dir, "fixtures")
if not os.path.isdir(fixtures_dir):
os.mkdir(fixtures_dir)
write_fixture_float64(
"float64_basic.arrows",
num_cols=10,
num_batches=2,
batch_size=65536,
fixtures_dir=fixtures_dir,
)
write_fixture_float64(
"float64_long.arrows",
num_cols=1,
num_batches=20,
batch_size=65536,
fixtures_dir=fixtures_dir,
)
write_fixture_float64(
"float64_wide.arrows",
num_cols=1280,
num_batches=1,
batch_size=1024,
fixtures_dir=fixtures_dir,
)