in 6-Pipelines/config/evaluate.py [0:0]
def get_dataset(dir_path, dataset_name) -> pd.DataFrame:
files = [ os.path.join(dir_path, file) for file in os.listdir(dir_path) ]
if len(files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(files, dataset_name))
raw_data = [ pd.read_csv(file, header=None) for file in files ]
df = pd.concat(raw_data)
return df