def from_arrow_schema()

in petastorm/unischema.py [0:0]


    def from_arrow_schema(cls, parquet_dataset, omit_unsupported_fields=False):
        """
        Convert an apache arrow schema into a unischema object. This is useful for datasets of only scalars
        which need no special encoding/decoding. If there is an unsupported type in the arrow schema, it will
        throw an exception.
        When the warn_only parameter is turned to True, unsupported column types prints only warnings.

        We do not set codec field in the generated fields since all parquet fields are out-of-the-box supported
        by pyarrow and we do not need perform any custom decoding.

        :param arrow_schema: :class:`pyarrow.lib.Schema`
        :param omit_unsupported_fields: :class:`Boolean`
        :return: A :class:`Unischema` object.
        """
        meta = compat_get_metadata(parquet_dataset.pieces[0], parquet_dataset.fs.open)
        arrow_schema = meta.schema.to_arrow_schema()
        unischema_fields = []

        for partition in parquet_dataset.partitions:
            if (pa.types.is_binary(partition.dictionary.type) and six.PY2) or \
                    (pa.types.is_string(partition.dictionary.type) and six.PY3):
                numpy_dtype = np.str_
            elif pa.types.is_int64(partition.dictionary.type):
                numpy_dtype = np.int64
            else:
                raise RuntimeError(('Expected partition type to be one of currently supported types: string or int64. '
                                    'Got {}').format(partition.dictionary.type))

            unischema_fields.append(UnischemaField(partition.name, numpy_dtype, (), None, False))

        for column_name in arrow_schema.names:
            arrow_field = compat_schema_field(arrow_schema, column_name)
            field_type = arrow_field.type
            field_shape = ()
            if isinstance(field_type, ListType):
                if isinstance(field_type.value_type, ListType) or isinstance(field_type.value_type, pyStructType):
                    warnings.warn('[ARROW-1644] Ignoring unsupported structure %r for field %r'
                                  % (field_type, column_name))
                    continue
                field_shape = (None,)
            try:
                np_type = _numpy_and_codec_from_arrow_type(field_type)
            except ValueError:
                if omit_unsupported_fields:
                    warnings.warn('Column %r has an unsupported field %r. Ignoring...'
                                  % (column_name, field_type))
                    continue
                else:
                    raise
            unischema_fields.append(UnischemaField(column_name, np_type, field_shape, None, arrow_field.nullable))
        return Unischema('inferred_schema', unischema_fields)