in xlml/utils/metric.py [0:0]
def get_gcs_file_location_with_regex(file_location: str) -> str:
"""
Get a file from GCS given a regex in the form of
`gs://<your_bucket>/<your_file_path_regex>`. Does not support
bucket name or path regex. Only supports file name regex.
Args:
file_location: File location regex in the form of
`gs://<your_bucket>/<path>/<your_file_name_regex>`.
Returns:
The file location of the first file that fits the given regex.
"""
storage_client = storage.Client()
url = urlparse(file_location)
bucket_name = url.netloc
file_path = url.path.strip("/")
file_path_regex = re.compile(file_path)
prefix = "/".join(file_path.split("/")[:-1])
all_blobs_names = [
b.name for b in storage_client.list_blobs(bucket_name, prefix=prefix)
]
try:
return (
f"gs://{bucket_name}/"
f"{next(filter(file_path_regex.match, all_blobs_names))}"
)
except StopIteration:
logging.warning(f"No objects matched supplied regex: {file_location}")
return ""