in src/sagemaker_xgboost_container/data_utils.py [0:0]
def _get_csv_dmatrix_file_mode(files_path, csv_weights):
"""Get Data Matrix from CSV data in file mode.
Infer the delimiter of data from first line of first data file.
:param files_path: File path where CSV formatted training data resides, either directory or file
:param csv_weights: 1 if instance weights are in second column of CSV data; else 0
:return: xgb.DMatrix
"""
csv_file = (
files_path
if os.path.isfile(files_path)
else [f for f in os.listdir(files_path) if os.path.isfile(os.path.join(files_path, f))][0]
)
with open(os.path.join(files_path, csv_file)) as read_file:
sample_csv_line = read_file.readline()
delimiter = _get_csv_delimiter(sample_csv_line)
try:
if csv_weights == 1:
dmatrix = xgb.DMatrix(
"{}?format=csv&label_column=0&delimiter={}&weight_column=1".format(files_path, delimiter)
)
else:
dmatrix = xgb.DMatrix("{}?format=csv&label_column=0&delimiter={}".format(files_path, delimiter))
except Exception as e:
raise exc.UserError("Failed to load csv data with exception:\n{}".format(e))
return dmatrix