in app/python/process.py [0:0]
def download_raw_data():
"""
Download raw data from Cloud Storage into local file for processing
"""
logging.info(" download_raw_data: start downloading data")
if RAW_DATA_BUCKET is None:
raise ValueError("RAW_DATA_BUCKET required")
if PROCESSED_DATA_BUCKET is None:
raise ValueError("PROCESSED_DATA_BUCKET required")
temp_datafile = f"{tempfile.mkdtemp()}/raw_data.csv"
logging.info(" download_raw_data: processing from " f"{RAW_DATA_BUCKET}")
storage_client = google.cloud.storage.Client()
raw_bucket = storage_client.get_bucket(RAW_DATA_BUCKET)
raw_bucket.blob(RAW_DATA_FILE).download_to_filename(temp_datafile)
logging.info(f" download_raw_data: downloaded data to {temp_datafile}")
return temp_datafile