in petastorm/unischema.py [0:0]
def create_schema_view(self, fields):
"""Creates a new instance of the schema using a subset of fields.
Fields can be either UnischemaField objects or regular expression patterns.
If one of the fields does not exist in this schema, an error is raised.
The example returns a schema, with field_1 and any other field matching ``other.*$`` pattern.
>>> SomeSchema.create_schema_view(
>>> [SomeSchema.field_1,
>>> 'other.*$'])
:param fields: A list of UnischemaField objects and/or regular expressions
:return: a new view of the original schema containing only the supplied fields
"""
# Split fields parameter to regex pattern strings and UnischemaField objects
regex_patterns = [field for field in fields if isinstance(field, string_types)]
# We can not check type against UnischemaField because the artifact introduced by
# pickling, since depickled UnischemaField are of type collections.UnischemaField
# while withing depickling they are of petastorm.unischema.UnischemaField
# Since UnischemaField is a tuple, we check against it since it is invariant to
# pickling
unischema_field_objects = [field for field in fields if isinstance(field, tuple)]
if len(unischema_field_objects) + len(regex_patterns) != len(fields):
raise ValueError('Elements of "fields" must be either a string (regular expressions) or '
'an instance of UnischemaField class.')
# For fields that are specified as instances of Unischema: make sure that this schema contains fields
# with these names.
exact_field_names = [field.name for field in unischema_field_objects]
unknown_field_names = set(exact_field_names) - set(self.fields.keys())
if unknown_field_names:
raise ValueError('field {} does not belong to the schema {}'.format(unknown_field_names, self))
# Do not use instances of Unischema fields passed as an argument as it could contain codec/shape
# info that is different from the one stored in this schema object
exact_fields = [self._fields[name] for name in exact_field_names]
view_fields = exact_fields + match_unischema_fields(self, regex_patterns)
return Unischema('{}_view'.format(self._name), view_fields)