fn try_from()

in arrow-schema/src/ffi.rs [417:613]


    fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
        let mut dtype = match c_schema.format() {
            "n" => DataType::Null,
            "b" => DataType::Boolean,
            "c" => DataType::Int8,
            "C" => DataType::UInt8,
            "s" => DataType::Int16,
            "S" => DataType::UInt16,
            "i" => DataType::Int32,
            "I" => DataType::UInt32,
            "l" => DataType::Int64,
            "L" => DataType::UInt64,
            "e" => DataType::Float16,
            "f" => DataType::Float32,
            "g" => DataType::Float64,
            "vz" => DataType::BinaryView,
            "z" => DataType::Binary,
            "Z" => DataType::LargeBinary,
            "vu" => DataType::Utf8View,
            "u" => DataType::Utf8,
            "U" => DataType::LargeUtf8,
            "tdD" => DataType::Date32,
            "tdm" => DataType::Date64,
            "tts" => DataType::Time32(TimeUnit::Second),
            "ttm" => DataType::Time32(TimeUnit::Millisecond),
            "ttu" => DataType::Time64(TimeUnit::Microsecond),
            "ttn" => DataType::Time64(TimeUnit::Nanosecond),
            "tDs" => DataType::Duration(TimeUnit::Second),
            "tDm" => DataType::Duration(TimeUnit::Millisecond),
            "tDu" => DataType::Duration(TimeUnit::Microsecond),
            "tDn" => DataType::Duration(TimeUnit::Nanosecond),
            "tiM" => DataType::Interval(IntervalUnit::YearMonth),
            "tiD" => DataType::Interval(IntervalUnit::DayTime),
            "tin" => DataType::Interval(IntervalUnit::MonthDayNano),
            "+l" => {
                let c_child = c_schema.child(0);
                DataType::List(Arc::new(Field::try_from(c_child)?))
            }
            "+L" => {
                let c_child = c_schema.child(0);
                DataType::LargeList(Arc::new(Field::try_from(c_child)?))
            }
            "+s" => {
                let fields = c_schema.children().map(Field::try_from);
                DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
            }
            "+m" => {
                let c_child = c_schema.child(0);
                let map_keys_sorted = c_schema.map_keys_sorted();
                DataType::Map(Arc::new(Field::try_from(c_child)?), map_keys_sorted)
            }
            "+r" => {
                let c_run_ends = c_schema.child(0);
                let c_values = c_schema.child(1);
                DataType::RunEndEncoded(
                    Arc::new(Field::try_from(c_run_ends)?),
                    Arc::new(Field::try_from(c_values)?),
                )
            }
            // Parametrized types, requiring string parse
            other => {
                match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
                    // FixedSizeBinary type in format "w:num_bytes"
                    ["w", num_bytes] => {
                        let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
                            ArrowError::CDataInterface(
                                "FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
                        })?;
                        DataType::FixedSizeBinary(parsed_num_bytes)
                    },
                    // FixedSizeList type in format "+w:num_elems"
                    ["+w", num_elems] => {
                        let c_child = c_schema.child(0);
                        let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
                            ArrowError::CDataInterface(
                                "The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
                        })?;
                        DataType::FixedSizeList(Arc::new(Field::try_from(c_child)?), parsed_num_elems)
                    },
                    // Decimal types in format "d:precision,scale" or "d:precision,scale,bitWidth"
                    ["d", extra] => {
                        match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
                            [precision, scale] => {
                                let parsed_precision = precision.parse::<u8>().map_err(|_| {
                                    ArrowError::CDataInterface(
                                        "The decimal type requires an integer precision".to_string(),
                                    )
                                })?;
                                let parsed_scale = scale.parse::<i8>().map_err(|_| {
                                    ArrowError::CDataInterface(
                                        "The decimal type requires an integer scale".to_string(),
                                    )
                                })?;
                                DataType::Decimal128(parsed_precision, parsed_scale)
                            },
                            [precision, scale, bits] => {
                                let parsed_precision = precision.parse::<u8>().map_err(|_| {
                                    ArrowError::CDataInterface(
                                        "The decimal type requires an integer precision".to_string(),
                                    )
                                })?;
                                let parsed_scale = scale.parse::<i8>().map_err(|_| {
                                    ArrowError::CDataInterface(
                                        "The decimal type requires an integer scale".to_string(),
                                    )
                                })?;
                                match *bits {
                                    "128" => DataType::Decimal128(parsed_precision, parsed_scale),
                                    "256" => DataType::Decimal256(parsed_precision, parsed_scale),
                                    _ => return Err(ArrowError::CDataInterface("Only 128- and 256- bit wide decimals are supported in the Rust implementation".to_string())),
                                }
                            }
                            _ => {
                                return Err(ArrowError::CDataInterface(format!(
                                    "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
                                )))
                            }
                        }
                    }
                    // DenseUnion
                    ["+ud", extra] => {
                        let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
                            ArrowError::CDataInterface(
                                "The Union type requires an integer type id".to_string(),
                            )
                        })).collect::<Result<Vec<_>, ArrowError>>()?;
                        let mut fields = Vec::with_capacity(type_ids.len());
                        for idx in 0..c_schema.n_children {
                            let c_child = c_schema.child(idx as usize);
                            let field = Field::try_from(c_child)?;
                            fields.push(field);
                        }

                        if fields.len() != type_ids.len() {
                            return Err(ArrowError::CDataInterface(
                                "The Union type requires same number of fields and type ids".to_string(),
                            ));
                        }

                        DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
                    }
                    // SparseUnion
                    ["+us", extra] => {
                        let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
                            ArrowError::CDataInterface(
                                "The Union type requires an integer type id".to_string(),
                            )
                        })).collect::<Result<Vec<_>, ArrowError>>()?;
                        let mut fields = Vec::with_capacity(type_ids.len());
                        for idx in 0..c_schema.n_children {
                            let c_child = c_schema.child(idx as usize);
                            let field = Field::try_from(c_child)?;
                            fields.push(field);
                        }

                        if fields.len() != type_ids.len() {
                            return Err(ArrowError::CDataInterface(
                                "The Union type requires same number of fields and type ids".to_string(),
                            ));
                        }

                        DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Sparse)
                    }

                    // Timestamps in format "tts:" and "tts:America/New_York" for no timezones and timezones resp.
                    ["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
                    ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
                    ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
                    ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
                    ["tss", tz] => {
                        DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz)))
                    }
                    ["tsm", tz] => {
                        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz)))
                    }
                    ["tsu", tz] => {
                        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz)))
                    }
                    ["tsn", tz] => {
                        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz)))
                    }
                    _ => {
                        return Err(ArrowError::CDataInterface(format!(
                            "The datatype \"{other:?}\" is still not supported in Rust implementation"
                        )))
                    }
                }
            }
        };

        if let Some(dict_schema) = c_schema.dictionary() {
            let value_type = Self::try_from(dict_schema)?;
            dtype = DataType::Dictionary(Box::new(dtype), Box::new(value_type));
        }

        Ok(dtype)
    }