arrow/benches/json_reader.rs (145 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. use criterion::*; use arrow::datatypes::*; use arrow::util::bench_util::{ create_primitive_array, create_string_array, create_string_array_with_len, }; use arrow_array::RecordBatch; use arrow_json::{LineDelimitedWriter, ReaderBuilder}; use std::io::Cursor; use std::sync::Arc; #[allow(deprecated)] fn do_bench(c: &mut Criterion, name: &str, json: &str, schema: SchemaRef) { c.bench_function(name, |b| { b.iter(|| { let cursor = Cursor::new(black_box(json)); let builder = ReaderBuilder::new(schema.clone()).with_batch_size(64); let reader = builder.build(cursor).unwrap(); for next in reader { next.unwrap(); } }) }); } fn small_bench_primitive(c: &mut Criterion) { let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8, true), Field::new("c2", DataType::Float64, true), Field::new("c3", DataType::UInt32, true), Field::new("c4", DataType::Boolean, true), ])); let json_content = r#" {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false} {"c1": "twelve", "c2": -55555555555555.2, "c3": 3} {"c1": null, "c2": 3, "c3": 125, "c4": null} {"c2": -35, "c3": 100.0, "c4": true} {"c1": "fifteen", "c2": null, "c4": true} {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false} {"c1": "twelve", "c2": -55555555555555.2, "c3": 3} {"c1": null, "c2": 3, "c3": 125, "c4": null} {"c2": -35, "c3": 100.0, "c4": true} {"c1": "fifteen", "c2": null, "c4": true} "#; do_bench(c, "small_bench_primitive", json_content, schema) } fn small_bench_primitive_with_utf8view(c: &mut Criterion) { let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8View, true), Field::new("c2", DataType::Float64, true), Field::new("c3", DataType::UInt32, true), Field::new("c4", DataType::Boolean, true), ])); let json_content = r#" {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false} {"c1": "twelve", "c2": -55555555555555.2, "c3": 3} {"c1": null, "c2": 3, "c3": 125, "c4": null} {"c2": -35, "c3": 100.0, "c4": true} {"c1": "fifteen", "c2": null, "c4": true} {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false} {"c1": "twelve", "c2": -55555555555555.2, "c3": 3} {"c1": null, "c2": 3, "c3": 125, "c4": null} {"c2": -35, "c3": 100.0, "c4": true} {"c1": "fifteen", "c2": null, "c4": true} "#; do_bench( c, "small_bench_primitive_with_utf8view", json_content, schema, ) } fn large_bench_primitive(c: &mut Criterion) { let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8, true), Field::new("c2", DataType::Int32, true), Field::new("c3", DataType::UInt32, true), Field::new("c4", DataType::Utf8, true), Field::new("c5", DataType::Utf8, true), Field::new("c6", DataType::Float32, true), ])); let c1 = Arc::new(create_string_array::<i32>(4096, 0.)); let c2 = Arc::new(create_primitive_array::<Int32Type>(4096, 0.)); let c3 = Arc::new(create_primitive_array::<UInt32Type>(4096, 0.)); let c4 = Arc::new(create_string_array_with_len::<i32>(4096, 0.2, 10)); let c5 = Arc::new(create_string_array_with_len::<i32>(4096, 0.2, 20)); let c6 = Arc::new(create_primitive_array::<Float32Type>(4096, 0.2)); let batch = RecordBatch::try_from_iter([ ("c1", c1 as _), ("c2", c2 as _), ("c3", c3 as _), ("c4", c4 as _), ("c5", c5 as _), ("c6", c6 as _), ]) .unwrap(); let mut out = Vec::with_capacity(1024); LineDelimitedWriter::new(&mut out).write(&batch).unwrap(); let json = std::str::from_utf8(&out).unwrap(); do_bench(c, "large_bench_primitive", json, schema) } fn small_bench_list(c: &mut Criterion) { let schema = Arc::new(Schema::new(vec![ Field::new( "c1", DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))), true, ), Field::new( "c2", DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), true, ), Field::new( "c3", DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true))), true, ), Field::new( "c4", DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), true, ), ])); let json = r#" {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]} {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]} {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]} {"c2": [-35], "c3": [100.0, 200.0], "c4": null} {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]} {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]} {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]} {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]} {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]} {"c2": [-35], "c3": [100.0, 200.0], "c4": null} {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]} {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]} "#; do_bench(c, "small_bench_list", json, schema) } fn criterion_benchmark(c: &mut Criterion) { small_bench_primitive(c); large_bench_primitive(c); small_bench_list(c); small_bench_primitive_with_utf8view(c); } criterion_group!(benches, criterion_benchmark); criterion_main!(benches);