baseline_model/data_utils/train_sim.py [80:130]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            s_p, s_n = pk_sim(code_vecs, args.p, args.k)
            loss = criterion(s_p, s_n)
            total_loss += loss.item()

            if training:
                model.zero_grad()
                loss.backward()
                optimizer.step()

                args.summary.step()
                args.summary.add_scalar('train/loss', loss.item())

            progress.set_description(f'Epoch {epoch} loss: {loss.item():.8f}')

    avg_loss = total_loss / num_iters

    if training:
        print(f'- training avg loss: {avg_loss:.8f}')
    else:
        print(f'- validation avg loss: {avg_loss:.8f}')

    return avg_loss

def validate(args, model, dataset, test_split, criterion, epoch, best_val, best_epoch, device):
    code_vecs, pids = run_test(args, model, dataset, test_split, device)

    code_vecs = code_vecs.numpy()
    pids = pids.numpy()
    sim = pairwise.cosine_similarity(code_vecs)

    map_r = map_at_r(sim, pids)
    if best_val is None or map_r > best_val:
        best_val = map_r
        best_epoch = epoch
    print(f'* validation MAP@R: {map_r}, best epoch: {best_epoch}, best MAP@R: {best_val}')

    args.summary.add_scalar('train/map_r', map_r)

    return best_val, best_epoch


def run_test(args, model, dataset, test_split, device):
    model.eval()

    test_gen_fun, num_iters = dataset.get_data_generator_function(
        test_split, args.batch_size, shuffle=False)

    code_vecs = []
    pids = []
    with tqdm.tqdm(test_gen_fun(), total=num_iters) as progress:
        for input, pids_batch in progress:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



baseline_model/data_utils/train_vul.py [168:256]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            s_p, s_n = pk_sim(code_vecs, args.p, args.k)
            loss = criterion(s_p, s_n)

            total_loss += loss.item()

            if training:
                model.zero_grad()
                loss.backward()
                optimizer.step()

                args.summary.step()
                args.summary.add_scalar('train/loss', loss.item())

            progress.set_description(f'Epoch {epoch} loss: {loss.item():.8f}')

    avg_loss = total_loss / num_iters

    if training:
        print(f'- training avg loss: {avg_loss:.8f}')
    else:
        print(f'- validation avg loss: {avg_loss:.8f}')

    return avg_loss

# def train(args, model, dataset, train_split, valid_split, test_split):
#     criterion = CircleLoss(gamma=args.gamma, m=args.margin)
#     train_gen_fun = dataset.get_pk_sample_generator_function(
#         train_split, args.p, args.k)
#     valid_gen_fun = dataset.get_pk_sample_generator_function(
#         valid_split, args.p, args.k)
#     train_num_iters = args.train_epoch_size
#     valid_num_iters = args.valid_epoch_size

#     criterion.to(args.device)

#     optimizer = optim.AdamW(model.parameters(), lr=args.lr)
#     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epoch_num+1)

#     args.summary = TrainingSummaryWriter(args.log_dir)

#     best_val = None
#     best_epoch = 0

#     for epoch in range(1, args.epoch_num + 1):
#         iterations(args, epoch, model, criterion, optimizer,
#                    train_gen_fun(), train_num_iters, True)

#         best_val, best_epoch = validate(args, model, dataset, valid_split, criterion,
#                                         epoch, best_val, best_epoch)

#         print(f'Epoch {epoch}: lr: {scheduler.get_last_lr()[0]*1000}')
#         scheduler.step()
#         # output_path = os.path.join(args.save, f'model.ep{epoch}.pt')
#         # torch.save(model.state_dict(), output_path)

#         if epoch == best_epoch and (args.save is not None):
#             output_path = os.path.join(args.save, f'model.pt')
#             torch.save(model.state_dict(), output_path)
#     test(args, model, dataset, test_split)


def validate(args, model, dataset, test_split, criterion, epoch, best_val, best_epoch, device):
    code_vecs, pids = run_test(args, model, dataset, test_split, device)

    code_vecs = code_vecs.numpy()
    pids = pids.numpy()
    sim = pairwise.cosine_similarity(code_vecs)

    map_r = map_at_r(sim, pids)
    if best_val is None or map_r > best_val:
        best_val = map_r
        best_epoch = epoch
    print(f'* validation MAP@R: {map_r}, best epoch: {best_epoch}, best MAP@R: {best_val}')

    args.summary.add_scalar('train/map_r', map_r)

    return best_val, best_epoch


def run_test(args, model, dataset, test_split, device):
    model.eval()

    test_gen_fun, num_iters = dataset.get_data_generator_function(
        test_split, args.batch_size, shuffle=False)

    code_vecs = []
    pids = []
    with tqdm.tqdm(test_gen_fun(), total=num_iters) as progress:
        for input, pids_batch in progress:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



