def get_dataset()

in 6-Pipelines/config/evaluate.py [0:0]


def get_dataset(dir_path, dataset_name) -> pd.DataFrame:
    files = [ os.path.join(dir_path, file) for file in os.listdir(dir_path) ]
    if len(files) == 0:
        raise ValueError(('There are no files in {}.\n' +
                          'This usually indicates that the channel ({}) was incorrectly specified,\n' +
                          'the data specification in S3 was incorrectly specified or the role specified\n' +
                          'does not have permission to access the data.').format(files, dataset_name))
    raw_data = [ pd.read_csv(file, header=None) for file in files ]
    df = pd.concat(raw_data)
    return df