def _sanitize_field_tf_types()

in petastorm/tf_utils.py [0:0]


def _sanitize_field_tf_types(sample):
    """Takes a named tuple and casts/promotes types unknown to TF to the types that are known.

    Three casts that are currently implemented
      - Decimal to string
      - uint16 to int32
      - np.datetime64 to int64, as nanoseconds since unix epoch

    :param sample: named tuple or a dictionary
    :return: same type as the input with values casted to types supported by Tensorflow
    """
    next_sample_dict = sample._asdict()

    for k, v in next_sample_dict.items():
        if v is None:
            raise RuntimeError('Encountered "{}"=None. Tensorflow does not support None values as a tensor.'
                               'Consider filtering out these rows using a predicate.'.format(k))
        # Assuming conversion to the same numpy type is trivial and dirty cheap
        if isinstance(v, Decimal):
            # Normalizing decimals only to get rid of the trailing zeros (makes testing easier, assuming has
            # no other effect)
            next_sample_dict[k] = str(v.normalize())
        elif isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.datetime64):
            # Convert to nanoseconds from POSIX epoch
            next_sample_dict[k] = (v - np.datetime64('1970-01-01T00:00:00.0')) \
                .astype('timedelta64[ns]').astype(np.int64)
        elif isinstance(v, np.ndarray) and v.dtype == np.uint16:
            next_sample_dict[k] = v.astype(np.int32)
        elif isinstance(v, np.ndarray) and v.dtype == np.uint32:
            next_sample_dict[k] = v.astype(np.int64)
        elif isinstance(v, np.ndarray) and v.dtype.type in (np.bytes_, np.unicode_):
            if v.size != 0:
                next_sample_dict[k] = v.tolist()
        elif isinstance(v, np.ndarray) and v.dtype.kind == 'O' and isinstance(v[0], datetime.date):
            # Pyarrow 0.12.1 started returning python datetime.date when parquet column is a DateType() column.
            # Convert values in such column into nsec from epoch int64.
            next_sample_dict[k] = _date_to_nsec_from_epoch_vectorized(v)

    # Construct object of the same type as the input
    return sample.__class__(**next_sample_dict)