def _get_num_valid_libsvm_features()

in src/sagemaker_xgboost_container/data_utils.py [0:0]


def _get_num_valid_libsvm_features(libsvm_line):
    """Get number of valid LIBSVM features.

    XGBoost expects the following LIBSVM format:
        <label>(:<instance weight>) <index>:<value> <index>:<value> <index>:<value> ...

    :param libsvm_line:
    :return: -1 if the line is not a valid LIBSVM line; otherwise, return number of correctly formatted features
    """
    split_line = libsvm_line.split(" ")
    num_sparse_features = 0

    if not _is_valid_libsvm_label(split_line[0]):
        logging.error("{} does not follow LIBSVM label format <label>(:<weight>).".format(split_line[0]))
        return -1

    if len(split_line) > 1:
        for idx in range(1, len(split_line)):
            if ":" not in split_line[idx]:
                return -1
            else:
                libsvm_feature_contents = split_line[1].split(":")
                if len(libsvm_feature_contents) != 2:
                    return -1
                else:
                    num_sparse_features += 1
        return num_sparse_features
    else:
        return 0