in torchdata/datapipes/iter/util/rows2columnar.py [0:0]
def __iter__(self) -> Iterator[Dict]:
for batch in self.source_datapipe:
columnar = defaultdict(list)
for list_or_dict_row in batch:
if isinstance(list_or_dict_row, dict):
# if column_names provided, we use it as a filter
if len(self.column_names) > 0:
for column_name in self.column_names:
# this line will raise a KeyError if column_name
# is not within list_or_dict_row which is the
# expected behavior
columnar[column_name].append(list_or_dict_row[column_name])
else:
for k, v in list_or_dict_row.items():
columnar[k].append(v)
else:
for i, v in enumerate(list_or_dict_row):
columnar[self.column_names[i]].append(v)
yield columnar