in dags/map_reproducibility/utils/common_utils.py [0:0]
def find_xprof_gcs_path(gcs_path):
"""
Find the .xplane.pb file in the latest date blob from the specified GCS path.
Args:
gcs_path (str): Full GCS path in the format gs://bucket-name/folder/path/
Returns:
str: Path to the .xplane.pb file in the latest date blob
"""
path_without_prefix = gcs_path.removeprefix("gs://")
parts = path_without_prefix.split("/", 1)
bucket_name = parts[0]
print(f"Bucket name: {bucket_name}")
prefix = parts[1] if len(parts) > 1 else ""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
# List all blobs in the bucket with the given prefix
print(f"Prefix: {prefix}")
blobs = list(bucket.list_blobs(prefix=prefix))
# Look for .xplane.pb file in the latest directory
xplane_pb_file = None
for blob in blobs:
if blob.name.endswith(".xplane.pb"):
xplane_pb_file = blob.name
break
if not xplane_pb_file:
print(f"No .xplane.pb file found in {gcs_path}")
return None
full_xplane_pb_file = f"gs://{bucket_name}/{xplane_pb_file}"
print(f"Found .xplane.pb file: {full_xplane_pb_file}")
return full_xplane_pb_file