def get_gcs_file_location_with_regex()

in xlml/utils/metric.py [0:0]


def get_gcs_file_location_with_regex(file_location: str) -> str:
  """
  Get a file from GCS given a regex in the form of
  `gs://<your_bucket>/<your_file_path_regex>`. Does not support
   bucket name or path regex. Only supports file name regex.

  Args:
    file_location: File location regex in the form of
        `gs://<your_bucket>/<path>/<your_file_name_regex>`.

  Returns:
    The file location of the first file that fits the given regex.
  """
  storage_client = storage.Client()

  url = urlparse(file_location)
  bucket_name = url.netloc
  file_path = url.path.strip("/")
  file_path_regex = re.compile(file_path)
  prefix = "/".join(file_path.split("/")[:-1])

  all_blobs_names = [
      b.name for b in storage_client.list_blobs(bucket_name, prefix=prefix)
  ]

  try:
    return (
        f"gs://{bucket_name}/"
        f"{next(filter(file_path_regex.match, all_blobs_names))}"
    )
  except StopIteration:
    logging.warning(f"No objects matched supplied regex: {file_location}")
    return ""