def evaluate()

in infersent_comp/train_nli.py [0:0]


def evaluate(epoch, eval_type='valid', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop
    test_prediction = []

    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = valid['s1'] if eval_type == 'valid' else test['s1']
    s2 = valid['s2'] if eval_type == 'valid' else test['s2']
    target = valid['label'] if eval_type == 'valid' else test['label']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec, params.word_emb_dim)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec, params.word_emb_dim)
        s1_batch, s2_batch = Variable(s1_batch.to(device)), Variable(s2_batch.to(device))
        tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).to(device)

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum().item()

        if eval_type == 'test':
            test_prediction.extend([inv_label[p] for p in pred.long().to('cpu').numpy()])

    # save model
    eval_acc = round(100 * correct / len(s1), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
        ex.log_metric('{}_accuracy'.format(eval_type), eval_acc, step=epoch)
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} :\
              {2}'.format(epoch, eval_type, eval_acc))
        ex.log_metric('{}_accuracy'.format(eval_type), eval_acc, step=epoch)

    if eval_type == 'valid' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            #if not os.path.exists(params.outputdir):
            #    os.makedirs(params.outputdir)
            torch.save(nli_net.state_dict(), params.outputmodelname)
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'
                      .format(params.lrshrink,
                              optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                # adam_stop = True

    if eval_type == 'test':
        target = [inv_label[t] for t in target]
        s1 = [' '.join(s) for s in s1]
        s2 = [' '.join(s) for s in s2]
        outp = pd.DataFrame({'s_1': s1, 's_2': s2, 'true_target':target, 'predicted': test_prediction})
        res_file = '{}_{}_outp.csv'.format(last_path, params.encoder_type)
        outp.to_csv(os.path.join(save_folder_name, res_file))
        #ex.log_asset(file_name=res_file, file_like_object=open(res_file,'r'))

    return eval_acc