in components/doc-deletion/src/doc_deletion_main.py [0:0]
def delete_doc_from_gcs(storage_client: storage.Client, gcs_uri: str):
matches = re.match(r"gs://(.*?)/(.*)", gcs_uri)
if not matches:
raise Exception(f"Could not parse output GCS destination: {gcs_uri}")
# Get List of Document Objects from the Output Bucket
gcs_bucket, gcs_path = matches.groups()
logger.info(f"Deleting document {gcs_path} from GCS bucket {gcs_bucket}")
bucket = storage_client.bucket(gcs_bucket)
try:
bucket.blob(gcs_path).delete()
except google.api_core.exceptions.NotFound:
logger.warning(f"GCS Object {gcs_path} was already deleted.")