in torcharrow/dtypes.py [0:0]
def __post_init__(self):
if self.nullable:
for f in self.fields:
if not f.dtype.nullable:
raise TypeError(
f"nullable structs require each field (like {f.name}) to be nullable as well."
)
# cache the type instance, __setattr__ hack is needed due to the frozen dataclass
# the _py_type is not listed above to avoid participation in equality check
def fix_name(name, idx):
# Anonomous Row
if name == "":
return "f_" + str(idx)
# Remove invalid character for NamedTuple
# TODO: this might cause name duplicates, do disambiguation
name = re.sub("[^a-zA-Z0-9_]", "_", name)
if name == "" or name[0].isdigit() or name[0] == "_":
name = "f_" + name
return name
object.__setattr__(
self,
"_py_type",
ty.NamedTuple(
"TorchArrowGeneratedStruct_" + str(type(self)._py_type_id),
[
(fix_name(f.name, idx), f.dtype.py_type)
for (idx, f) in enumerate(self.fields)
],
),
)
type(self)._py_type_id += 1