def _download_file()

in src/preprocess.py [0:0]


    def _download_file(self, index, bucket, key):
        pathlib.Path(f"{self._base_dir}/data").mkdir(parents=True, exist_ok=True)

        self._logger.info("Downloading data from bucket: %s, key: %s", bucket, key)
        fn = f"{self._base_dir}/data/{index}.csv"
        s3 = boto3.resource("s3")
        s3.Bucket(bucket).download_file(key, fn)

        self._logger.debug("Reading raw input data.")
        df = pd.read_csv(
            fn,
            header=None,
            names=feature_columns_names + [label_column],
            dtype=DataProcessor.merge_two_dicts(feature_columns_dtype, label_column_dtype),
        )
        os.unlink(fn)   
        return df