def bagging()

in training/NeuralPatternMatchingTraining.py [0:0]
44 lines of code
13 McCabe index (conditional complexity)

def bagging(model_string, M, train_set, lr=1e-4, l1_coeff=1e-2, l2=1e-1, max_epochs=50, no_prototypes=3, gating_param=100,
            batch_size=32, embedding_dim=768, dim_target=2, highlights_pow_base=np.exp(1), save_path=None, device='cpu'):

    if model_string == 'NeuralPM':
        model_class = NeuralPM
    elif model_string == 'NeuralPMSpouse':
        model_class = NeuralPMSpouse
    elif model_string == 'NeuralPMwoHighlights':  # Ablation # 1
        model_class = NeuralPMwoHighlights
    elif model_string == 'NeuralPMSpousewoHighlights':  # Ablation # 1
        model_class = NeuralPMSpousewoHighlights
    elif model_string == 'NeuralPMNoLogic':
        model_class = NeuralPMNoLogic
    elif model_string == 'NeuralPMOnlyCosine':
        model_class = NeuralPMOnlyCosine
    elif model_string == 'NeuralPMSpouseNoLogic':
        model_class = NeuralPMSpouseNoLogic
    elif model_string == 'NeuralPMSpouseOnlyCosine':
        model_class = NeuralPMSpouseOnlyCosine
    else:
        raise

    # Determine the number of samples N
    N = len(train_set)

    models = []

    # Initialize weights
    weights = torch.ones(N)

    for i in range(M):

        # Normalized probabilities of weights
        p = weights / torch.sum(weights)
        # Give a bit of chance to other samples
        p[p == 0] = 1 / N

        if i > 1:
            # Define the sampler for this iteration
            sampler = SingleRunWeightedRandomSampler(weights=p, num_samples=N, replacement=True)

            # Build the data loader
            train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=custom_collate, sampler=sampler,
                                      pin_memory=False, num_workers=0)
        else:
            # The first LF will have access to all examples
            train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=custom_collate, shuffle=True,
                                      pin_memory=False, num_workers=0)

        # Istantiate the LF
        model = model_class(embedding_dim, dim_target, num_prototypes=no_prototypes, gating_param=gating_param,
                            highlights_pow_base=highlights_pow_base)

        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2)

        # gamma = decaying factor
        scheduler = StepLR(optimizer, step_size=100, gamma=0.9)

        # Move everything to device for computation
        model.to(device)
        model.gating_param = model.gating_param.to(device)
        model.a_or = model.a_or.to(device)
        model.two = model.two.to(device)

        # Train the new model
        epochs = trainLF(train_loader, model, l1_coeff, optimizer, scheduler, max_epochs)

        # Append new model to models list
        models.append(model)

    return models