in petastorm/unischema.py [0:0]
def _numpy_and_codec_from_arrow_type(field_type):
from pyarrow import types
if types.is_int8(field_type):
np_type = np.int8
elif types.is_int16(field_type):
np_type = np.int16
elif types.is_int32(field_type):
np_type = np.int32
elif types.is_int64(field_type):
np_type = np.int64
elif types.is_string(field_type):
np_type = np.unicode_
elif types.is_boolean(field_type):
np_type = np.bool_
elif types.is_float32(field_type):
np_type = np.float32
elif types.is_float64(field_type):
np_type = np.float64
elif types.is_decimal(field_type):
np_type = Decimal
elif types.is_binary(field_type):
np_type = np.string_
elif types.is_fixed_size_binary(field_type):
np_type = np.string_
elif types.is_date(field_type):
np_type = np.datetime64
elif types.is_timestamp(field_type):
np_type = np.datetime64
elif types.is_list(field_type):
np_type = _numpy_and_codec_from_arrow_type(field_type.value_type)
else:
raise ValueError('Cannot auto-create unischema due to unsupported column type {}'.format(field_type))
return np_type