in src/sagemaker_xgboost_container/data_utils.py [0:0]
def get_size(data_path, is_pipe=False):
"""Return size of data files at dir_path.
:param data_path: Either directory or file
:param is_pipe: Boolean to indicate if data is being read in pipe mode
:return: Size of data or 1 if sagemaker pipe found
"""
if is_pipe and os.path.exists(f"{data_path}_0"):
logging.info(f"Pipe path {data_path} found.")
return 1
if not os.path.exists(data_path):
logging.info(f"Path {data_path} does not exist!")
return 0
else:
total_size = 0
if os.path.isfile(data_path):
return os.path.getsize(data_path)
else:
for root, dirs, files in os.walk(data_path):
for current_file in files:
if current_file.startswith("."):
raise exc.UserError("Hidden file found in the data path! Remove that before training.")
file_path = os.path.join(root, current_file)
total_size += os.path.getsize(file_path)
return total_size