def _get_csv_dmatrix_file_mode()

in src/sagemaker_xgboost_container/data_utils.py [0:0]


def _get_csv_dmatrix_file_mode(files_path, csv_weights):
    """Get Data Matrix from CSV data in file mode.

    Infer the delimiter of data from first line of first data file.

    :param files_path: File path where CSV formatted training data resides, either directory or file
    :param csv_weights: 1 if instance weights are in second column of CSV data; else 0
    :return: xgb.DMatrix
    """
    csv_file = (
        files_path
        if os.path.isfile(files_path)
        else [f for f in os.listdir(files_path) if os.path.isfile(os.path.join(files_path, f))][0]
    )
    with open(os.path.join(files_path, csv_file)) as read_file:
        sample_csv_line = read_file.readline()
    delimiter = _get_csv_delimiter(sample_csv_line)

    try:
        if csv_weights == 1:
            dmatrix = xgb.DMatrix(
                "{}?format=csv&label_column=0&delimiter={}&weight_column=1".format(files_path, delimiter)
            )
        else:
            dmatrix = xgb.DMatrix("{}?format=csv&label_column=0&delimiter={}".format(files_path, delimiter))

    except Exception as e:
        raise exc.UserError("Failed to load csv data with exception:\n{}".format(e))

    return dmatrix