in src/datasets/utils/py_utils.py [0:0]
def convert_file_size_to_int(size: Union[int, str]) -> int:
"""
Converts a size expressed as a string with digits an unit (like `"50MB"`) to an integer (in bytes).
Args:
size (`int` or `str`): The size to convert. Will be directly returned if an `int`.
Example:
```py
>>> convert_file_size_to_int("1MiB")
1048576
```
"""
if isinstance(size, int):
return size
if size.upper().endswith("PIB"):
return int(size[:-3]) * (2**50)
if size.upper().endswith("TIB"):
return int(size[:-3]) * (2**40)
if size.upper().endswith("GIB"):
return int(size[:-3]) * (2**30)
if size.upper().endswith("MIB"):
return int(size[:-3]) * (2**20)
if size.upper().endswith("KIB"):
return int(size[:-3]) * (2**10)
if size.upper().endswith("PB"):
int_size = int(size[:-2]) * (10**15)
return int_size // 8 if size.endswith("b") else int_size
if size.upper().endswith("TB"):
int_size = int(size[:-2]) * (10**12)
return int_size // 8 if size.endswith("b") else int_size
if size.upper().endswith("GB"):
int_size = int(size[:-2]) * (10**9)
return int_size // 8 if size.endswith("b") else int_size
if size.upper().endswith("MB"):
int_size = int(size[:-2]) * (10**6)
return int_size // 8 if size.endswith("b") else int_size
if size.upper().endswith("KB"):
int_size = int(size[:-2]) * (10**3)
return int_size // 8 if size.endswith("b") else int_size
raise ValueError(f"`size={size}` is not in a valid format. Use an integer followed by the unit, e.g., '5GB'.")