in arrow-schema/src/ffi.rs [417:613]
fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
let mut dtype = match c_schema.format() {
"n" => DataType::Null,
"b" => DataType::Boolean,
"c" => DataType::Int8,
"C" => DataType::UInt8,
"s" => DataType::Int16,
"S" => DataType::UInt16,
"i" => DataType::Int32,
"I" => DataType::UInt32,
"l" => DataType::Int64,
"L" => DataType::UInt64,
"e" => DataType::Float16,
"f" => DataType::Float32,
"g" => DataType::Float64,
"vz" => DataType::BinaryView,
"z" => DataType::Binary,
"Z" => DataType::LargeBinary,
"vu" => DataType::Utf8View,
"u" => DataType::Utf8,
"U" => DataType::LargeUtf8,
"tdD" => DataType::Date32,
"tdm" => DataType::Date64,
"tts" => DataType::Time32(TimeUnit::Second),
"ttm" => DataType::Time32(TimeUnit::Millisecond),
"ttu" => DataType::Time64(TimeUnit::Microsecond),
"ttn" => DataType::Time64(TimeUnit::Nanosecond),
"tDs" => DataType::Duration(TimeUnit::Second),
"tDm" => DataType::Duration(TimeUnit::Millisecond),
"tDu" => DataType::Duration(TimeUnit::Microsecond),
"tDn" => DataType::Duration(TimeUnit::Nanosecond),
"tiM" => DataType::Interval(IntervalUnit::YearMonth),
"tiD" => DataType::Interval(IntervalUnit::DayTime),
"tin" => DataType::Interval(IntervalUnit::MonthDayNano),
"+l" => {
let c_child = c_schema.child(0);
DataType::List(Arc::new(Field::try_from(c_child)?))
}
"+L" => {
let c_child = c_schema.child(0);
DataType::LargeList(Arc::new(Field::try_from(c_child)?))
}
"+s" => {
let fields = c_schema.children().map(Field::try_from);
DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
}
"+m" => {
let c_child = c_schema.child(0);
let map_keys_sorted = c_schema.map_keys_sorted();
DataType::Map(Arc::new(Field::try_from(c_child)?), map_keys_sorted)
}
"+r" => {
let c_run_ends = c_schema.child(0);
let c_values = c_schema.child(1);
DataType::RunEndEncoded(
Arc::new(Field::try_from(c_run_ends)?),
Arc::new(Field::try_from(c_values)?),
)
}
// Parametrized types, requiring string parse
other => {
match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
// FixedSizeBinary type in format "w:num_bytes"
["w", num_bytes] => {
let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
ArrowError::CDataInterface(
"FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
})?;
DataType::FixedSizeBinary(parsed_num_bytes)
},
// FixedSizeList type in format "+w:num_elems"
["+w", num_elems] => {
let c_child = c_schema.child(0);
let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
ArrowError::CDataInterface(
"The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
})?;
DataType::FixedSizeList(Arc::new(Field::try_from(c_child)?), parsed_num_elems)
},
// Decimal types in format "d:precision,scale" or "d:precision,scale,bitWidth"
["d", extra] => {
match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
[precision, scale] => {
let parsed_precision = precision.parse::<u8>().map_err(|_| {
ArrowError::CDataInterface(
"The decimal type requires an integer precision".to_string(),
)
})?;
let parsed_scale = scale.parse::<i8>().map_err(|_| {
ArrowError::CDataInterface(
"The decimal type requires an integer scale".to_string(),
)
})?;
DataType::Decimal128(parsed_precision, parsed_scale)
},
[precision, scale, bits] => {
let parsed_precision = precision.parse::<u8>().map_err(|_| {
ArrowError::CDataInterface(
"The decimal type requires an integer precision".to_string(),
)
})?;
let parsed_scale = scale.parse::<i8>().map_err(|_| {
ArrowError::CDataInterface(
"The decimal type requires an integer scale".to_string(),
)
})?;
match *bits {
"128" => DataType::Decimal128(parsed_precision, parsed_scale),
"256" => DataType::Decimal256(parsed_precision, parsed_scale),
_ => return Err(ArrowError::CDataInterface("Only 128- and 256- bit wide decimals are supported in the Rust implementation".to_string())),
}
}
_ => {
return Err(ArrowError::CDataInterface(format!(
"The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
)))
}
}
}
// DenseUnion
["+ud", extra] => {
let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
ArrowError::CDataInterface(
"The Union type requires an integer type id".to_string(),
)
})).collect::<Result<Vec<_>, ArrowError>>()?;
let mut fields = Vec::with_capacity(type_ids.len());
for idx in 0..c_schema.n_children {
let c_child = c_schema.child(idx as usize);
let field = Field::try_from(c_child)?;
fields.push(field);
}
if fields.len() != type_ids.len() {
return Err(ArrowError::CDataInterface(
"The Union type requires same number of fields and type ids".to_string(),
));
}
DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
}
// SparseUnion
["+us", extra] => {
let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
ArrowError::CDataInterface(
"The Union type requires an integer type id".to_string(),
)
})).collect::<Result<Vec<_>, ArrowError>>()?;
let mut fields = Vec::with_capacity(type_ids.len());
for idx in 0..c_schema.n_children {
let c_child = c_schema.child(idx as usize);
let field = Field::try_from(c_child)?;
fields.push(field);
}
if fields.len() != type_ids.len() {
return Err(ArrowError::CDataInterface(
"The Union type requires same number of fields and type ids".to_string(),
));
}
DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Sparse)
}
// Timestamps in format "tts:" and "tts:America/New_York" for no timezones and timezones resp.
["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
["tss", tz] => {
DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz)))
}
["tsm", tz] => {
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz)))
}
["tsu", tz] => {
DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz)))
}
["tsn", tz] => {
DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz)))
}
_ => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{other:?}\" is still not supported in Rust implementation"
)))
}
}
}
};
if let Some(dict_schema) = c_schema.dictionary() {
let value_type = Self::try_from(dict_schema)?;
dtype = DataType::Dictionary(Box::new(dtype), Box::new(value_type));
}
Ok(dtype)
}