in smallpond/logical/dataset.py [0:0]
def merge(datasets: "List[ParquetDataSet]") -> "ParquetDataSet":
assert all(isinstance(dataset, ParquetDataSet) for dataset in datasets)
dataset = ParquetDataSet(
paths=[p for dataset in datasets for p in dataset.absolute_paths],
root_dir=None,
recursive=any(dataset.recursive for dataset in datasets),
columns=datasets[0].columns,
generated_columns=datasets[0].generated_columns,
union_by_name=any(dataset.union_by_name for dataset in datasets),
)
# merge row ranges if any dataset has resolved row ranges
if any(dataset._resolved_row_ranges is not None for dataset in datasets):
dataset._resolved_row_ranges = [row_range for dataset in datasets for row_range in dataset.resolved_row_ranges]
return dataset