baseline_model/data_utils/train_sim.py [136:188]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            code_vecs.append(v.detach().cpu())
            pids.append(pids_batch)
    code_vecs = torch.cat(code_vecs, dim=0)
    pids = torch.cat(pids)

    return code_vecs, pids

def get_pairwise_scores_and_labels(sim, pids):
    inds = np.tril_indices(len(pids))
    scores = sim[inds]
    labels = pids[inds[0]] == pids[inds[1]]
    return scores, labels


def area_under_prg(labels, scores):
    prg_curve = prg.create_prg_curve(labels, scores)
    auprg = prg.calc_auprg(prg_curve)
    return auprg


def map_at_r(sim, pids):
    r = np.bincount(pids) - 1
    max_r = r.max()

    mask = np.arange(max_r)[None, :] < r[pids][:, None]

    sim = np.copy(sim)
    np.fill_diagonal(sim, -np.inf)
    result = np.argsort(sim, axis=1)[:, :-max_r-1:-1]
    tp = (pids[result] == pids[:, None])
    tp[~mask] = False

    p = np.cumsum(tp, axis=1) / np.arange(1, max_r+1)[None, :]
    ap = (p * tp).sum(axis=1) / r[pids]

    return ap.mean()

def test(args, model, dataset, test_split, device):
    code_vecs, pids = run_test(args, model, dataset, test_split, device)
    code_vecs = code_vecs.numpy()
    pids = pids.numpy()
    sim = pairwise.cosine_similarity(code_vecs)
    compute_metrics(sim, pids)


def compute_metrics(sim, pids):
    scores, labels = get_pairwise_scores_and_labels(sim, pids)

    ap = average_precision_score(labels, scores)
    auprg = area_under_prg(labels, scores)
    map_r = map_at_r(sim, pids)

    print(f'MAP@R: {map_r}, AP: {ap}, AUPRG: {auprg}')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


baseline_model/data_utils/train_vul.py [262:316]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            code_vecs.append(v.detach().cpu())
            pids.append(pids_batch)
    code_vecs = torch.cat(code_vecs, dim=0)
    pids = torch.cat(pids)

    return code_vecs, pids


def get_pairwise_scores_and_labels(sim, pids):
    inds = np.tril_indices(len(pids))
    scores = sim[inds]
    labels = pids[inds[0]] == pids[inds[1]]
    return scores, labels


def area_under_prg(labels, scores):
    prg_curve = prg.create_prg_curve(labels, scores)
    auprg = prg.calc_auprg(prg_curve)
    return auprg


def map_at_r(sim, pids):
    r = np.bincount(pids) - 1
    max_r = r.max()

    mask = np.arange(max_r)[None, :] < r[pids][:, None]

    sim = np.copy(sim)
    np.fill_diagonal(sim, -np.inf)
    result = np.argsort(sim, axis=1)[:, :-max_r-1:-1]
    tp = (pids[result] == pids[:, None])
    tp[~mask] = False

    p = np.cumsum(tp, axis=1) / np.arange(1, max_r+1)[None, :]
    ap = (p * tp).sum(axis=1) / r[pids]

    return ap.mean()


def test(args, model, dataset, test_split, device):
    code_vecs, pids = run_test(args, model, dataset, test_split, device)
    code_vecs = code_vecs.numpy()
    pids = pids.numpy()
    sim = pairwise.cosine_similarity(code_vecs)
    compute_metrics(sim, pids)


def compute_metrics(sim, pids):
    scores, labels = get_pairwise_scores_and_labels(sim, pids)

    ap = average_precision_score(labels, scores)
    auprg = area_under_prg(labels, scores)
    map_r = map_at_r(sim, pids)

    print(f'MAP@R: {map_r}, AP: {ap}, AUPRG: {auprg}')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -