in arrow-integration-test/src/lib.rs [1128:1329]
fn test_arrow_data_equality() {
let secs_tz = Some("Europe/Budapest".into());
let millis_tz = Some("America/New_York".into());
let micros_tz = Some("UTC".into());
let nanos_tz = Some("Africa/Johannesburg".into());
let schema = Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Field::new("int32s", DataType::Int32, true),
Field::new("int64s", DataType::Int64, true),
Field::new("uint8s", DataType::UInt8, true),
Field::new("uint16s", DataType::UInt16, true),
Field::new("uint32s", DataType::UInt32, true),
Field::new("uint64s", DataType::UInt64, true),
Field::new("float32s", DataType::Float32, true),
Field::new("float64s", DataType::Float64, true),
Field::new("date_days", DataType::Date32, true),
Field::new("date_millis", DataType::Date64, true),
Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
Field::new(
"ts_millis",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micros",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
),
Field::new(
"ts_nanos",
DataType::Timestamp(TimeUnit::Nanosecond, None),
true,
),
Field::new(
"ts_secs_tz",
DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
true,
),
Field::new(
"ts_millis_tz",
DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
true,
),
Field::new(
"ts_micros_tz",
DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
true,
),
Field::new(
"ts_nanos_tz",
DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
true,
),
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
true,
),
Field::new(
"structs",
DataType::Struct(Fields::from(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
])),
true,
),
]);
let bools_with_metadata_map = BooleanArray::from(vec![Some(true), None, Some(false)]);
let bools_with_metadata_vec = BooleanArray::from(vec![Some(true), None, Some(false)]);
let bools = BooleanArray::from(vec![Some(true), None, Some(false)]);
let int8s = Int8Array::from(vec![Some(1), None, Some(3)]);
let int16s = Int16Array::from(vec![Some(1), None, Some(3)]);
let int32s = Int32Array::from(vec![Some(1), None, Some(3)]);
let int64s = Int64Array::from(vec![Some(1), None, Some(3)]);
let uint8s = UInt8Array::from(vec![Some(1), None, Some(3)]);
let uint16s = UInt16Array::from(vec![Some(1), None, Some(3)]);
let uint32s = UInt32Array::from(vec![Some(1), None, Some(3)]);
let uint64s = UInt64Array::from(vec![Some(1), None, Some(3)]);
let float32s = Float32Array::from(vec![Some(1.0), None, Some(3.0)]);
let float64s = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
let date_days = Date32Array::from(vec![Some(1196848), None, None]);
let date_millis = Date64Array::from(vec![
Some(167903550396207),
Some(29923997007884),
Some(30612271819236),
]);
let time_secs = Time32SecondArray::from(vec![Some(27974), Some(78592), Some(43207)]);
let time_millis =
Time32MillisecondArray::from(vec![Some(6613125), Some(74667230), Some(52260079)]);
let time_micros = Time64MicrosecondArray::from(vec![Some(62522958593), None, None]);
let time_nanos =
Time64NanosecondArray::from(vec![Some(73380123595985), None, Some(16584393546415)]);
let ts_secs = TimestampSecondArray::from(vec![None, Some(193438817552), None]);
let ts_millis =
TimestampMillisecondArray::from(vec![None, Some(38606916383008), Some(58113709376587)]);
let ts_micros = TimestampMicrosecondArray::from(vec![None, None, None]);
let ts_nanos = TimestampNanosecondArray::from(vec![None, None, Some(-6473623571954960143)]);
let ts_secs_tz = TimestampSecondArray::from(vec![None, Some(193438817552), None])
.with_timezone_opt(secs_tz);
let ts_millis_tz =
TimestampMillisecondArray::from(vec![None, Some(38606916383008), Some(58113709376587)])
.with_timezone_opt(millis_tz);
let ts_micros_tz =
TimestampMicrosecondArray::from(vec![None, None, None]).with_timezone_opt(micros_tz);
let ts_nanos_tz =
TimestampNanosecondArray::from(vec![None, None, Some(-6473623571954960143)])
.with_timezone_opt(nanos_tz);
let utf8s = StringArray::from(vec![Some("aa"), None, Some("bbb")]);
let value_data = Int32Array::from(vec![None, Some(2), None, None]);
let value_offsets = Buffer::from_slice_ref([0, 3, 4, 4]);
let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data.into_data())
.null_bit_buffer(Some(Buffer::from([0b00000011])))
.build()
.unwrap();
let lists = ListArray::from(list_data);
let structs_int32s = Int32Array::from(vec![None, Some(-2), None]);
let structs_utf8s = StringArray::from(vec![None, None, Some("aaaaaa")]);
let struct_data_type = DataType::Struct(Fields::from(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
]));
let struct_data = ArrayData::builder(struct_data_type)
.len(3)
.add_child_data(structs_int32s.into_data())
.add_child_data(structs_utf8s.into_data())
.null_bit_buffer(Some(Buffer::from([0b00000011])))
.build()
.unwrap();
let structs = StructArray::from(struct_data);
let record_batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(bools_with_metadata_map),
Arc::new(bools_with_metadata_vec),
Arc::new(bools),
Arc::new(int8s),
Arc::new(int16s),
Arc::new(int32s),
Arc::new(int64s),
Arc::new(uint8s),
Arc::new(uint16s),
Arc::new(uint32s),
Arc::new(uint64s),
Arc::new(float32s),
Arc::new(float64s),
Arc::new(date_days),
Arc::new(date_millis),
Arc::new(time_secs),
Arc::new(time_millis),
Arc::new(time_micros),
Arc::new(time_nanos),
Arc::new(ts_secs),
Arc::new(ts_millis),
Arc::new(ts_micros),
Arc::new(ts_nanos),
Arc::new(ts_secs_tz),
Arc::new(ts_millis_tz),
Arc::new(ts_micros_tz),
Arc::new(ts_nanos_tz),
Arc::new(utf8s),
Arc::new(lists),
Arc::new(structs),
],
)
.unwrap();
let mut file = File::open("data/integration.json").unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let arrow_json: ArrowJson = serde_json::from_str(&json).unwrap();
// test schemas
assert!(arrow_json.schema.equals_schema(&schema));
// test record batch
assert_eq!(arrow_json.get_record_batches().unwrap()[0], record_batch);
}