fn visit_list()

in parquet/src/arrow/schema/complex.rs [388:522]


    fn visit_list(
        &mut self,
        list_type: &TypePtr,
        context: VisitorContext,
    ) -> Result<Option<ParquetField>> {
        if list_type.is_primitive() {
            return Err(arrow_err!(
                "{:?} is a list type and can't be processed as primitive.",
                list_type
            ));
        }

        let fields = list_type.get_fields();
        if fields.len() != 1 {
            return Err(arrow_err!(
                "list type must have a single child, found {}",
                fields.len()
            ));
        }

        let repeated_field = &fields[0];
        if get_repetition(repeated_field) != Repetition::REPEATED {
            return Err(arrow_err!("List child must be repeated"));
        }

        // If the list is nullable
        let (def_level, nullable) = match list_type.get_basic_info().repetition() {
            Repetition::REQUIRED => (context.def_level, false),
            Repetition::OPTIONAL => (context.def_level + 1, true),
            Repetition::REPEATED => return Err(arrow_err!("List type cannot be repeated")),
        };

        let arrow_field = match &context.data_type {
            Some(DataType::List(f)) => Some(f.as_ref()),
            Some(DataType::LargeList(f)) => Some(f.as_ref()),
            Some(DataType::FixedSizeList(f, _)) => Some(f.as_ref()),
            Some(d) => {
                return Err(arrow_err!(
                    "incompatible arrow schema, expected list got {}",
                    d
                ))
            }
            None => None,
        };

        if repeated_field.is_primitive() {
            // If the repeated field is not a group, then its type is the element type and elements are required.
            //
            // required/optional group my_list (LIST) {
            //   repeated int32 element;
            // }
            //
            let context = VisitorContext {
                rep_level: context.rep_level,
                def_level,
                data_type: arrow_field.map(|f| f.data_type().clone()),
            };

            return match self.visit_primitive(repeated_field, context) {
                Ok(Some(mut field)) => {
                    // visit_primitive will infer a non-nullable list, update if necessary
                    field.nullable = nullable;
                    Ok(Some(field))
                }
                r => r,
            };
        }

        // test to see if the repeated field is a struct or one-tuple
        let items = repeated_field.get_fields();
        if items.len() != 1
            || (!repeated_field.is_list()
                && !repeated_field.has_single_repeated_child()
                && (repeated_field.name() == "array"
                    || repeated_field.name() == format!("{}_tuple", list_type.name())))
        {
            // If the repeated field is a group with multiple fields, then its type is the element
            // type and elements are required.
            //
            // If the repeated field is a group with one field and is named either array or uses
            // the LIST-annotated group's name with _tuple appended then the repeated type is the
            // element type and elements are required. But this rule only applies if the
            // repeated field is not annotated, and the single child field is not `repeated`.
            let context = VisitorContext {
                rep_level: context.rep_level,
                def_level,
                data_type: arrow_field.map(|f| f.data_type().clone()),
            };

            return match self.visit_struct(repeated_field, context) {
                Ok(Some(mut field)) => {
                    field.nullable = nullable;
                    Ok(Some(field))
                }
                r => r,
            };
        }

        // Regular list handling logic
        let item_type = &items[0];
        let rep_level = context.rep_level + 1;
        let def_level = def_level + 1;

        let new_context = VisitorContext {
            def_level,
            rep_level,
            data_type: arrow_field.map(|f| f.data_type().clone()),
        };

        match self.dispatch(item_type, new_context) {
            Ok(Some(item)) => {
                let item_field = Arc::new(convert_field(item_type, &item, arrow_field));

                // Use arrow type as hint for index size
                let arrow_type = match context.data_type {
                    Some(DataType::LargeList(_)) => DataType::LargeList(item_field),
                    Some(DataType::FixedSizeList(_, len)) => {
                        DataType::FixedSizeList(item_field, len)
                    }
                    _ => DataType::List(item_field),
                };

                Ok(Some(ParquetField {
                    rep_level,
                    def_level,
                    nullable,
                    arrow_type,
                    field_type: ParquetFieldType::Group {
                        children: vec![item],
                    },
                }))
            }
            r => r,
        }
    }