def delete_doc_from_gcs()

in components/doc-deletion/src/doc_deletion_main.py [0:0]


def delete_doc_from_gcs(storage_client: storage.Client, gcs_uri: str):
    matches = re.match(r"gs://(.*?)/(.*)", gcs_uri)
    if not matches:
        raise Exception(f"Could not parse output GCS destination: {gcs_uri}")
    # Get List of Document Objects from the Output Bucket
    gcs_bucket, gcs_path = matches.groups()
    logger.info(f"Deleting document {gcs_path} from GCS bucket {gcs_bucket}")
    bucket = storage_client.bucket(gcs_bucket)
    try:
        bucket.blob(gcs_path).delete()
    except google.api_core.exceptions.NotFound:
        logger.warning(f"GCS Object {gcs_path} was already deleted.")