def get_job_gcs_bucket_folder()

in dags/map_reproducibility/utils/common_utils.py [0:0]


def get_job_gcs_bucket_folder(job_name, bucket_name=BUCKET_NAME):
  """
  Get the GCS bucket folder for a specific job.

  Args:
      bucket_name (str): The name of the GCS bucket
      job_name (str): The job name to search for

  Returns:
      str: The full path to the bucket folder containing the job
  """
  gcs_location = f"gs://{bucket_name}/maxtext/"
  bucket_folder_cmd = f"gcloud storage ls {gcs_location} | grep {job_name}"
  print(f"bucket_folder_cmd: {bucket_folder_cmd}")

  try:
    bucket_folder = (
        subprocess.check_output(bucket_folder_cmd, shell=True).decode().strip()
    )
    bucket_folder_prefix_removed = bucket_folder.removeprefix("gs://")
    pantheon_bucket_link = (
        "https://pantheon.corp.google.com/storage/browser/"
        + bucket_folder_prefix_removed
    )
    print(f"BUCKET PANTHEON LINK: {pantheon_bucket_link}")
    return bucket_folder
  except subprocess.CalledProcessError as e:
    print(f"Error finding bucket folder: {e}")
    return None