in src/sagemaker_xgboost_container/data_utils.py [0:0]
def _validate_libsvm_format(file_path):
"""Validate that data file is LIBSVM format.
XGBoost expects the following LIBSVM format:
<label>(:<instance weight>) <index>:<value> <index>:<value> <index>:<value> ...
Note: This only validates the first line that has a feature. This is not a comprehensive file check,
as XGBoost will have its own data validation.
:param file_path
"""
with open(file_path, "r", errors="ignore") as read_file:
for line_to_validate in read_file:
num_sparse_libsvm_features = _get_num_valid_libsvm_features(line_to_validate)
if num_sparse_libsvm_features > 1:
# Return after first valid LIBSVM line with features
return
elif num_sparse_libsvm_features < 0:
raise exc.UserError(
_get_invalid_libsvm_error_msg(
line_snippet=line_to_validate[:50], file_name=file_path.split("/")[-1]
)
)
logging.warning(
"File {} is not an invalid LIBSVM file but has no features. Accepting simple validation.".format(
file_path.split("/")[-1]
)
)