def download_pdf_gcs()

in microservices/extraction_service/src/utils/utils_functions.py [0:0]


def download_pdf_gcs(bucket_name=None, gcs_uri=None, file_to_download=None,
                     output_filename=None) -> str:
  """
    Function takes a path of an object/file stored in GCS bucket and
            downloads the file in the current working directory

    Args:
        bucket_name (str): bucket name from where file to be downloaded
        gcs_uri (str): GCS object/file path
        output_filename (str): desired filename
        file_to_download (str): gcs file path excluding bucket name.
            Ex: if file is stored in X bucket under the folder Y with
            filename ABC.txt
            then file_to_download = Y/ABC.txt
    Return:
        pdf_path (str): pdf file path that is downloaded from the
                bucket and stored in local
  """
  if bucket_name is None:
    bucket_name = gcs_uri.split("/")[2]
  # if file to download is not provided it can be extracted from the GCS URI
  if file_to_download is None and gcs_uri is not None:
    file_to_download = "/".join(gcs_uri.split("/")[3:])
  storage_client = storage.Client()
  bucket = storage_client.get_bucket(bucket_name)
  blob = bucket.blob(file_to_download)
  # save file, if output path provided
  if output_filename:
    with open(output_filename, "wb") as file_obj:
      blob.download_to_file(file_obj)
  return blob