dataflux_pytorch/lightning/path_utils.py (31 lines of code) (raw):

from pathlib import Path from typing import Union, Tuple def process_input_path(path: Union[str, Path]) -> str: if isinstance(path, str): return path elif isinstance(path, Path): # When casting from Path object to string, it considers cloud URLs as Network URLs and gets rid of // scheme, rest = str(path).split(":/") return str(scheme) + "://" + str(rest) else: raise TypeError( "path argument must be of type string or pathlib.Path object") def parse_gcs_path(path: Union[str, Path]) -> Tuple[str, str]: if not path: raise ValueError("Path cannot be empty") input_path = process_input_path(path) if not (input_path.startswith("gcs://") or input_path.startswith("gs://")): raise ValueError("Path needs to begin with gcs:// or gs://") input_path = input_path.split("//", maxsplit=1) if not input_path or len(input_path) < 2: raise ValueError("Bucket name must be non-empty") split = input_path[1].split("/", maxsplit=1) bucket_name = "" if len(split) == 1: bucket_name = split[0] prefix = "" else: bucket_name, prefix = split if not bucket_name: raise ValueError("Bucket name must be non-empty") return bucket_name, prefix