def compute()

in training/NeuralPatternMatchingTraining.py [0:0]
168 lines of code
32 McCabe index (conditional complexity)

def compute(model_string, dataset_string, highlighted_set_path, non_highlighted_set_path, validation_set_path, test_set_path, no_Lfs, train_size, boostrap_split, train_split, validation_split, unlabelled_split,
            results_folder, score_to_optimize, dim_target=2, rationale_noise=0):

    if dataset_string == 'NREHatespeechDataset':
        dataset_class = NREHatespeechDataset
    elif dataset_string == 'NRESpouseDataset':
        dataset_class = NRESpouseDataset
    elif dataset_string == 'NREMovieReviewDataset':
        dataset_class = NREMovieReviewDataset
    else:
        raise

    highlighted_set = dataset_class(highlighted_set_path, rationale_noise=rationale_noise)
    non_highlighted_set = dataset_class(non_highlighted_set_path, rationale_noise=rationale_noise)

    test_set = dataset_class(test_set_path)

    all_train_dataset = ConcatDataset((highlighted_set, non_highlighted_set))

    train_set = Subset(all_train_dataset, train_split)

    if validation_set_path is not None:
        validation_set = dataset_class(validation_set_path)
        # Validation split is not interesting if we have an explicit validation set
        unlabelled_set = Subset(all_train_dataset, validation_split + unlabelled_split)
    else:
        validation_set = Subset(all_train_dataset, validation_split)
        unlabelled_set = Subset(all_train_dataset, unlabelled_split)

    best_vl_scores = {'accuracy': 0.,
                      'precision': 0.,
                      'recall': 0.,
                      'f1': 0.}

    te_scores = {
        'best_params': {},
        'best_vl_scores': {},
        'test_scores': {'accuracy': 0,
                        'precision': 0,
                        'recall': 0,
                        'f1': 0}
    }

    if 'fasttext' in highlighted_set_path:
        embedding_dim = 300
    else:
        embedding_dim = 768

    for hpb in [float(np.exp(1)), 5, 10]:
        for lr in [1e-2]:
            for l1 in [1e-2, 1e-3]:
                for l2 in [1e-3, 1e-4]:
                    for no_prototypes in [5, 10]:
                        for num_epochs in [500]:
                            for gating_param in [10, 100]:


                                models = bagging(model_string, no_Lfs, train_set, \
                                                 lr=lr, l1_coeff=l1, l2=l2, max_epochs=num_epochs, \
                                                 no_prototypes=no_prototypes, embedding_dim=embedding_dim,
                                                 gating_param=gating_param, batch_size=32,
                                                 save_path=None, highlights_pow_base=hpb)

                                # Compute prediction for each NRE
                                for dataset_type, dataset in [('train', train_set), ('validation', validation_set),
                                                              ('test', test_set), ('unlabelled', unlabelled_set)]:

                                    if dataset_type == 'unlabelled' and no_Lfs == 1:
                                        continue

                                    # IMPORTANT: SHUFFLE MUST STAY FALSE (SEE TARGETS LATER)
                                    loader = DataLoader(dataset, batch_size=256, collate_fn=custom_collate, shuffle=False,
                                                        num_workers=0)
                                    predictions, _ = compute_predictions_for_DP(models, loader,
                                                                                save_path=Path(results_folder,
                                                                                               'stored_results',
                                                                                               f'predictions_{train_size}_size_{dataset_type}_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'))

                                # Compute and store targets on different files
                                for dataset, dataset_type in [(train_set, 'train'), (validation_set, 'validation'),
                                                              (test_set, 'test')]:

                                    dataset_loader = DataLoader(dataset, batch_size=256, collate_fn=custom_collate,
                                                                shuffle=False,
                                                                num_workers=2)

                                    all_targets = None
                                    for _, _, _, targets, _ in dataset_loader:
                                        targets, _ = targets

                                        if all_targets is None:
                                            all_targets = targets
                                        else:
                                            all_targets = torch.cat((all_targets, targets), dim=0)

                                    if not os.path.exists(Path(results_folder, 'stored_results')):
                                        os.makedirs(Path(results_folder, 'stored_results'))

                                    torch.save(all_targets,
                                               Path(results_folder, 'stored_results', f'all_targets_{dataset_type}_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'))

                                all_targets_valid = torch.load(
                                    Path(results_folder, 'stored_results', f'all_targets_validation_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'))
                                all_targets_test = torch.load(
                                    Path(results_folder, 'stored_results', f'all_targets_test_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'))

                                # For Data Programming
                                all_targets_valid_score = np.copy(all_targets_valid)  # It will be used to compute scores
                                all_targets_valid[all_targets_valid == 0] = 2
                                targets_valid = all_targets_valid.numpy()

                                all_targets_test_score = np.copy(all_targets_test)  # It will be used to compute scores
                                all_targets_test[all_targets_test == 0] = 2

                                train_predictions = torch.load(Path(results_folder, 'stored_results',
                                                                    f'predictions_{train_size}_size_train_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'),
                                                               map_location='cpu')

                                if no_Lfs != 1:
                                    unlabelled_predictions = torch.load(Path(results_folder, 'stored_results',
                                                                         f'predictions_{train_size}_size_unlabelled_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'),
                                                                    map_location='cpu')

                                valid_predictions = torch.load(Path(results_folder, 'stored_results',
                                                                    f'predictions_{train_size}_size_validation_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'),
                                                               map_location='cpu')

                                test_predictions = torch.load(Path(results_folder, 'stored_results',
                                                                   f'predictions_{train_size}_size_test_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch'),
                                                              map_location='cpu')

                                train_predictions = train_predictions.cpu().reshape(-1, dim_target)

                                if no_Lfs != 1:
                                    unlabelled_predictions = unlabelled_predictions.cpu().reshape(-1, dim_target)

                                valid_predictions = valid_predictions.cpu().reshape(-1, dim_target)
                                test_predictions = test_predictions.cpu().reshape(-1, dim_target)

                                for threshold in [0.01, 0.05]:

                                    Ls_train = process_outputs(train_predictions, no_Lfs, threshold)
                                    if no_Lfs != 1:
                                        Ls_unlabelled = process_outputs(unlabelled_predictions, no_Lfs, threshold)
                                    Ls_valid = process_outputs(valid_predictions, no_Lfs, threshold)
                                    Ls_test = process_outputs(test_predictions, no_Lfs, threshold)

                                    if no_Lfs != 1:
                                        # Concatenate train and "unlabelled" data
                                        Ls_dataset = np.concatenate((Ls_train, Ls_unlabelled), axis=0)

                                        search_space = {
                                            'n_epochs': [100, 500],
                                            'lr': {'range': [0.01, 0.001], 'scale': 'log'},
                                            'show_plots': True,
                                        }

                                        tuner = RandomSearchTuner(LabelModelNoSeed)  # , seed=123)

                                        # ------------ DANGER ZONE: be careful here! ------------ #

                                        # Train on train+unlabelled because it is unsupervised (exploit unlabelled data), and "optimize" on
                                        # small validation set

                                        label_aggregator = tuner.search(
                                            search_space,
                                            train_args=[Ls_dataset],
                                            X_dev=Ls_valid, Y_dev=targets_valid.squeeze(),
                                            max_search=10, verbose=False, metric=score_to_optimize,
                                            shuffle=False
                                            # Leave it False, ow gen_splits generates different splits compared to linear baseline
                                        )

                                        Y_vl = label_aggregator.predict(Ls_valid)
                                        Y_test = label_aggregator.predict(Ls_test)

                                    # ------------ END OF DANGER ZONE ------------ #

                                    else:
                                        Y_vl = Ls_valid[:, 0]
                                        Y_test = Ls_test[:, 0]

                                    Y_vl[Y_vl == 2] = 0
                                    Y_test[Y_test == 2] = 0

                                    vl_pr = precision_score(all_targets_valid_score, Y_vl) * 100
                                    vl_rec = recall_score(all_targets_valid_score, Y_vl) * 100
                                    vl_acc = accuracy_score(all_targets_valid_score, Y_vl) * 100
                                    vl_f1 = f1_score(all_targets_valid_score, Y_vl) * 100

                                    te_pr = precision_score(all_targets_test_score, Y_test) * 100
                                    te_rec = recall_score(all_targets_test_score, Y_test) * 100
                                    te_acc = accuracy_score(all_targets_test_score, Y_test) * 100
                                    te_f1 = f1_score(all_targets_test_score, Y_test) * 100

                                    vl_scores = {'accuracy': float(vl_acc),
                                                 'precision': float(vl_pr),
                                                 'recall': float(vl_rec),
                                                 'f1': float(vl_f1)
                                                 }

                                    if vl_scores[score_to_optimize] > best_vl_scores[score_to_optimize]:
                                        best_vl_scores = deepcopy(vl_scores)
                                        best_params = deepcopy(
                                            {'learning_rate': lr, 'l1': l1, 'l2': l2,
                                             'train_split': train_split,
                                             'validation_split': validation_split,
                                             'no_prototypes': no_prototypes,
                                             'gating_param': gating_param,
                                             'threshold': threshold,
                                             'error_multiplier': hpb,
                                             'epochs': num_epochs})

                                        te_scores['best_params'] = best_params
                                        te_scores['best_vl_scores'] = best_vl_scores
                                        te_scores['test_scores'] = {'accuracy': float(te_acc),
                                                                    'precision': float(te_pr),
                                                                    'recall': float(te_rec),
                                                                    'f1': float(te_f1)}

    print(f'Best VL scores found is {best_vl_scores}')
    print(f'Best TE scores found is {te_scores["test_scores"]}')
    print(f'End of model assessment for train size {train_size} and {no_Lfs} rules, test results are {te_scores}')

    if not os.path.exists(results_folder):
        os.makedirs(results_folder)

    with open(
            Path(results_folder, f'NRE_size_{train_size}_rules_{no_Lfs}_test_results_bootstrap_{boostrap_split}.json'), 'w') as f:
        json.dump(te_scores, f)