in petastorm/transform.py [0:0]
def transform_schema(schema, transform_spec):
"""Creates a post-transform given a pre-transform schema and a transform_spec with mutation instructions.
:param schema: A pre-transform schema
:param transform_spec: a TransformSpec object with mutation instructions.
:return: A post-transform schema
"""
removed_fields = set(transform_spec.removed_fields)
unknown_field_names = removed_fields - set(schema.fields.keys())
if unknown_field_names:
warnings.warn('remove_fields specified some field names that are not part of the schema. '
'These field names will be ignored "{}". '.format(', '.join(unknown_field_names)))
exclude_fields = {f[0] for f in transform_spec.edit_fields} | removed_fields
fields = [v for k, v in schema.fields.items() if k not in exclude_fields]
for field_to_edit in transform_spec.edit_fields:
edited_unischema_field = UnischemaField(name=field_to_edit[0], numpy_dtype=field_to_edit[1],
shape=field_to_edit[2], codec=None, nullable=field_to_edit[3])
fields.append(edited_unischema_field)
if transform_spec.selected_fields is not None:
unknown_field_names = set(transform_spec.selected_fields) - set(f.name for f in fields)
if unknown_field_names:
warnings.warn('selected_fields specified some field names that are not part of the schema. '
'These field names will be ignored "{}". '.format(', '.join(unknown_field_names)))
fields = [f for f in fields if f.name in transform_spec.selected_fields]
fields = sorted(fields, key=lambda f: transform_spec.selected_fields.index(f.name))
return Unischema(schema._name + '_transformed', fields)