def find_xprof_gcs_path()

in dags/map_reproducibility/utils/common_utils.py [0:0]


def find_xprof_gcs_path(gcs_path):
  """
  Find the .xplane.pb file in the latest date blob from the specified GCS path.

  Args:
      gcs_path (str): Full GCS path in the format gs://bucket-name/folder/path/

  Returns:
      str: Path to the .xplane.pb file in the latest date blob
  """
  path_without_prefix = gcs_path.removeprefix("gs://")

  parts = path_without_prefix.split("/", 1)
  bucket_name = parts[0]
  print(f"Bucket name: {bucket_name}")

  prefix = parts[1] if len(parts) > 1 else ""

  storage_client = storage.Client()
  bucket = storage_client.get_bucket(bucket_name)

  # List all blobs in the bucket with the given prefix
  print(f"Prefix: {prefix}")
  blobs = list(bucket.list_blobs(prefix=prefix))

  # Look for .xplane.pb file in the latest directory
  xplane_pb_file = None
  for blob in blobs:
    if blob.name.endswith(".xplane.pb"):
      xplane_pb_file = blob.name
      break

  if not xplane_pb_file:
    print(f"No .xplane.pb file found in {gcs_path}")
    return None

  full_xplane_pb_file = f"gs://{bucket_name}/{xplane_pb_file}"
  print(f"Found .xplane.pb file: {full_xplane_pb_file}")
  return full_xplane_pb_file