utils_nlp/dataset/bbc_hindi.py [140:161]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if test_sample_ratio > 1.0:
        test_sample_ratio = 1.0
        logging.warning("Setting the testing sample ratio to 1.0")
    elif test_sample_ratio < 0:
        logging.error("Invalid testing sample ration: {}".format(test_sample_ratio))
        raise ValueError("Invalid testing sample ration: {}".format(test_sample_ratio))

    if train_sample_ratio < 1.0:
        train_df = train_df.sample(frac=train_sample_ratio).reset_index(drop=True)
    if test_sample_ratio < 1.0:
        test_df = test_df.sample(frac=test_sample_ratio).reset_index(drop=True)

    train_labels = label_encoder.transform(train_df[label_col])
    train_df[label_col] = train_labels
    test_labels = label_encoder.transform(test_df[label_col])
    test_df[label_col] = test_labels

    processor = Processor(model_name=model_name, to_lower=to_lower, cache_dir=cache_dir)

    train_dataset = processor.dataset_from_dataframe(
        df=train_df, text_col=text_col, label_col=label_col, max_len=max_len,
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


utils_nlp/dataset/multinli.py [224:245]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if test_sample_ratio > 1.0:
        test_sample_ratio = 1.0
        logging.warning("Setting the testing sample ratio to 1.0")
    elif test_sample_ratio < 0:
        logging.error("Invalid testing sample ration: {}".format(test_sample_ratio))
        raise ValueError("Invalid testing sample ration: {}".format(test_sample_ratio))

    if train_sample_ratio < 1.0:
        train_df = train_df.sample(frac=train_sample_ratio).reset_index(drop=True)
    if test_sample_ratio < 1.0:
        test_df = test_df.sample(frac=test_sample_ratio).reset_index(drop=True)

    train_labels = label_encoder.transform(train_df[label_col])
    train_df[label_col] = train_labels
    test_labels = label_encoder.transform(test_df[label_col])
    test_df[label_col] = test_labels

    processor = Processor(model_name=model_name, to_lower=to_lower, cache_dir=cache_dir)

    train_dataset = processor.dataset_from_dataframe(
        df=train_df, text_col=text_col, label_col=label_col, max_len=max_len,
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -