in src/sagemaker_xgboost_container/data_utils.py [0:0]
def _get_file_mode_files_path(data_path: Union[List[str], str]) -> List[str]:
"""
:param data_path: Either directory or file
"""
# In file mode, we create a temp directory with symlink to all input files or
# directories to meet XGB's assumption that all files are in the same directory.
logging.info("File path {} of input files".format(data_path))
# Create a directory with symlinks to input files.
files_path = "/tmp/sagemaker_xgboost_input_data"
shutil.rmtree(files_path, ignore_errors=True)
os.mkdir(files_path)
if isinstance(data_path, list):
for path in data_path:
_make_symlinks_from_a_folder_with_warning(files_path, path)
else:
if not os.path.exists(data_path):
logging.info("File path {} does not exist!".format(data_path))
return None
elif os.path.isdir(data_path) or os.path.isfile(data_path):
# traverse all sub-dirs to load all training data
_make_symlinks_from_a_folder_with_warning(files_path, data_path)
else:
exc.UserError("Unknown input files path: {}".format(data_path))
return files_path