def get_size()

in src/sagemaker_xgboost_container/data_utils.py [0:0]


def get_size(data_path, is_pipe=False):
    """Return size of data files at dir_path.

    :param data_path: Either directory or file
    :param is_pipe: Boolean to indicate if data is being read in pipe mode
    :return: Size of data or 1 if sagemaker pipe found
    """
    if is_pipe and os.path.exists(f"{data_path}_0"):
        logging.info(f"Pipe path {data_path} found.")
        return 1
    if not os.path.exists(data_path):
        logging.info(f"Path {data_path} does not exist!")
        return 0
    else:
        total_size = 0
        if os.path.isfile(data_path):
            return os.path.getsize(data_path)
        else:
            for root, dirs, files in os.walk(data_path):
                for current_file in files:
                    if current_file.startswith("."):
                        raise exc.UserError("Hidden file found in the data path! Remove that before training.")
                    file_path = os.path.join(root, current_file)
                    total_size += os.path.getsize(file_path)
            return total_size