def get_content_type()

in src/sagemaker_xgboost_container/data_utils.py [0:0]


def get_content_type(content_type_cfg_val):
    """Get content type from data config.

    Assumes that training and validation data have the same content type.

    ['libsvm', 'text/libsvm ;charset=utf8', 'text/x-libsvm'] will return 'libsvm'
    ['csv', 'text/csv', 'text/csv; label_size=1'] will return 'csv'

    :param content_type_cfg_val
    :return: Parsed content type
    """
    if content_type_cfg_val is None:
        return LIBSVM
    else:
        # cgi.parse_header extracts all arguments after ';' as key-value pairs
        # e.g. cgi.parse_header('text/csv;label_size=1;charset=utf8') returns
        # the tuple ('text/csv', {'label_size': '1', 'charset': 'utf8'})
        content_type, params = cgi.parse_header(content_type_cfg_val.lower())

        if content_type in [CSV, _content_types.CSV]:
            # CSV content type allows a label_size parameter
            # that should be 1 for XGBoost
            if params and "label_size" in params and params["label_size"] != "1":
                msg = (
                    "{} is not an accepted csv ContentType. "
                    "Optional parameter label_size must be equal to 1".format(content_type_cfg_val)
                )
                raise exc.UserError(msg)
            return CSV
        elif content_type in [LIBSVM, xgb_content_types.LIBSVM, xgb_content_types.X_LIBSVM]:
            return LIBSVM
        elif content_type in [PARQUET, xgb_content_types.X_PARQUET]:
            return PARQUET
        elif content_type in [RECORDIO_PROTOBUF, xgb_content_types.X_RECORDIO_PROTOBUF]:
            return RECORDIO_PROTOBUF
        else:
            raise exc.UserError(_get_invalid_content_type_error_msg(content_type_cfg_val))