def localization_errors()

in videoalignment/eval.py [0:0]


def localization_errors(model, dataset, args, phase):
    device = get_device(model)
    print("Computing localization error...")
    model.eval()

    # Get feature vectors for every video
    dataset_obj = dataset(phase, args, get_single_videos=True, pad=False)
    dataloader = DataLoader(
        dataset_obj, batch_size=1, num_workers=min(3 * args.b_s // 4, 12)
    )
    iter_dl = iter(dataloader)

    fvs = []

    for it, (ts, xs) in enumerate(iter_dl):
        with torch.no_grad():
            ts = ts.float().to(device)
            xs = xs.float().to(device)
            fvs.append(model.single_fv(ts, xs).data.cpu().numpy())
    fvs = np.concatenate(fvs, 0)

    # Compute pairwise scores
    all_pairs = dataset_obj.all_pairs
    iter_comb = list(itertools.combinations(fvs, 2))
    scores = []
    length = dataset_obj.length
    all_offsets = torch.arange(-length, length).unsqueeze(0).to(device)

    for it, fvs in enumerate(batch(iter_comb, 1)):
        with torch.no_grad():
            fv_a = np.asarray([fv[0] for fv in fvs])
            fv_b = np.asarray([fv[1] for fv in fvs])

            fv_a = torch.from_numpy(fv_a).float().to(device)
            fv_b = torch.from_numpy(fv_b).float().to(device)
            scores.append(model.score_pair(fv_a, fv_b, all_offsets).data.cpu().numpy())
    scores = np.concatenate(scores, 0)

    # for each query....
    errors = []
    all_pairs_dict = dict()
    for i, p in enumerate(all_pairs):
        for k in (frozenset(p["videos"][0].items()), frozenset(p["videos"][1].items())):
            if k not in all_pairs_dict.keys():
                all_pairs_dict[k] = [i]
            else:
                all_pairs_dict[k].append(i)

    for v_i, v in enumerate(dataset_obj.videos):
        rs_indexes = all_pairs_dict[frozenset(v.items())]
        rs = [all_pairs[i] for i in rs_indexes]
        rs_scores = np.asarray([scores[i] for i in rs_indexes])
        rs_index = np.argsort(np.max(rs_scores, -1))[::-1]
        rs_index = rs_index[0]
        rs = rs[rs_index]
        det_offset = (
            np.argmax(rs_scores[rs_index]) - dataset_obj.length
        ) / dataset_obj.fps

        try:
            op = next(
                op for op in dataset_obj.overlapping_pairs if is_the_same_pair(op, rs)
            )
            if op["videos"] == rs["videos"]:
                gt_offset = -op["offset"]
            else:
                gt_offset = op["offset"]
            errors.append(abs(det_offset - gt_offset))
        except:
            errors.append(float("inf"))

    errors = np.array(errors)

    def better_than_t(t):
        return np.sum(errors < t) / errors.size

    for t in [0.1, 1, 10]:
        print(f"Localization error <{t}s: {better_than_t(t)*100:.2f}%")
    return errors