def download_pdf_gcs()

in microservices/classification_service/src/utils/classification/download_pdf_gcs.py [0:0]


def download_pdf_gcs(bucket_name=None,
                     gcs_uri=None,
                     file_to_download=None,
                     output_filename='gcs.pdf') -> str:
  """
  Function takes a path of an object/file stored in GCS bucket
  and downloads
  the file in the current working directory
  Args:
      bucket_name (str): bucket name from where file to be
      downloaded Defaults to None.
      gcs_uri (str): GCS object/file path. Defaults to None.
      file_to_download (str): desired filename. Defaults to None.
      output_filename (str): file_to_download = Y/ABC.txt.
      Defaults to 'gcs.pdf'.

  Returns:
      str: pdf file path that is downloaded from the bucket
      and stored in local
  """
  if bucket_name is None:
    bucket_name = gcs_uri.split('/')[2]

  # if file to download is not provided it can be extracted from the GCS URI
  if file_to_download is None and gcs_uri is not None:
    file_to_download = '/'.join(gcs_uri.split('/')[3:])

  storage_client = storage.Client()
  bucket = storage_client.get_bucket(bucket_name)
  blob = bucket.blob(file_to_download)

  with open(output_filename, 'wb') as file_obj:
    blob.download_to_file(file_obj)