in petastorm/unischema.py [0:0]
def _numpy_to_spark_mapping():
"""Returns a mapping from numpy to pyspark.sql type. Caches the mapping dictionary inorder to avoid instantiation
of multiple objects in each call."""
# Refer to the attribute of the function we use to cache the map using a name in the variable instead of a 'dot'
# notation to avoid copy/paste/typo mistakes
cache_attr_name = 'cached_numpy_to_pyspark_types_map'
if not hasattr(_numpy_to_spark_mapping, cache_attr_name):
import pyspark.sql.types as T
setattr(_numpy_to_spark_mapping, cache_attr_name,
{
np.int8: T.ByteType(),
np.uint8: T.ShortType(),
np.int16: T.ShortType(),
np.uint16: T.IntegerType(),
np.int32: T.IntegerType(),
np.int64: T.LongType(),
np.float32: T.FloatType(),
np.float64: T.DoubleType(),
np.string_: T.StringType(),
np.str_: T.StringType(),
np.unicode_: T.StringType(),
np.bool_: T.BooleanType(),
})
return getattr(_numpy_to_spark_mapping, cache_attr_name)