src/util/storage.py (29 lines of code) (raw):

import glob import os from google.cloud import storage import pandas as pd def download_bucket_to_file(bucket_name, blob_path, destination_file_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_path) blob.download_to_filename(destination_file_name) def download_bucket_to_csv(bucket_name, blob_path): dest = "temp_path.csv" download_bucket_to_file(bucket_name, blob_path, dest) return pd.read_csv(dest) def upload_directory(dir_path: str, bucket_name: str, destination_path: str, depth=0): print(f"Searching {dir_path}") storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) for local_file in glob.glob(f"{dir_path}/**"): base_name = os.path.basename(local_file) if os.path.isfile(local_file): remote_path = os.path.join(destination_path, base_name) blob = bucket.blob(remote_path) print(f"Uploading {local_file} to {remote_path}") blob.upload_from_filename(local_file) else: if depth > 0: upload_directory(f"{dir_path}/{base_name}", bucket_name, f"{destination_path}/{base_name}", depth - 1) else: print(f"Not uploading deeper folder {local_file}")