in tzrec/datasets/data_parser.py [0:0]
def dump_parsed_inputs(self, input_data: Dict[str, torch.Tensor]) -> pa.Array:
"""Dump parsed inputs for debug."""
feature_rows = defaultdict(dict)
for f in self._features:
if f.is_sparse:
lengths = input_data[f"{f.name}.lengths"]
values = input_data[f"{f.name}.values"].cpu().numpy()
cnt = 0
# pyre-ignore [16]
sep = f.sequence_delim if f.is_sequence else ","
for i, ll in enumerate(lengths):
cur_v = values[cnt : cnt + ll]
cnt += ll
feature_rows[i][f.name] = sep.join(cur_v.astype(str))
else:
if f.is_sequence:
lengths = input_data[f"{f.name}.lengths"]
values = input_data[f"{f.name}.values"].cpu().numpy()
cnt = 0
for i, ll in enumerate(lengths):
cur_v = values[cnt : cnt + ll]
cnt += ll
feature_rows[i][f.name] = f.sequence_delim.join(
map(",".join, cur_v.astype(str))
)
else:
values = input_data[f"{f.name}.values"].cpu().numpy()
for i, cur_v in enumerate(values):
feature_rows[i][f.name] = ",".join(cur_v.astype(str))
result = []
for i in range(len(feature_rows)):
result.append(" | ".join([f"{k}:{v}" for k, v in feature_rows[i].items()]))
return pa.array(result)