def gt_grd_eval()

in scripts/eval_grd_anet_entities.py [0:0]


    def gt_grd_eval(self):

        ref = self.ref
        pred = self.pred
        print('Number of videos in the reference: {}, number of videos in the submission: {}'.format(len(ref), len(pred)))

        results = defaultdict(list)
        for vid, anns in ref.items():
            for seg, ann in anns['segments'].items():
                if len(ann['frame_ind']) == 0:
                    continue # annotation not available

                ref_bbox_all = torch.cat((torch.Tensor(ann['process_bnd_box']), \
                    torch.Tensor(ann['frame_ind']).unsqueeze(-1)), dim=1) # 5-D coordinates
                sent_idx = set(itertools.chain.from_iterable(ann['process_idx'])) # index of word in sentence to evaluate
                for idx in sent_idx:
                    sel_idx = [ind for ind, i in enumerate(ann['process_idx']) if idx in i]
                    ref_bbox = ref_bbox_all[sel_idx] # select matched boxes
                    # Note that despite discouraged, a single word could be annotated across multiple boxes/frames
                    assert(ref_bbox.size(0) > 0)

                    class_name = ann['process_clss'][sel_idx[0]][ann['process_idx'][sel_idx[0]].index(idx)]
                    if vid not in pred:
                        results[class_name].append(0) # video not grounded
                    elif seg not in pred[vid]:
                        results[class_name].append(0) # segment not grounded
                    elif idx not in pred[vid][seg]['idx_in_sent']:
                        results[class_name].append(0) # object not grounded
                    else:
                        pred_ind = pred[vid][seg]['idx_in_sent'].index(idx)
                        pred_bbox = torch.cat((torch.Tensor(pred[vid][seg]['bbox_for_all_frames'][pred_ind])[:,:4], \
                            torch.Tensor(range(10)).unsqueeze(-1)), dim=1)

                        frm_mask = torch.from_numpy(get_frm_mask(pred_bbox[:, 4].numpy(), \
                            ref_bbox[:, 4].numpy()).astype('uint8'))
                        overlap = bbox_overlaps_batch(pred_bbox[:, :5].unsqueeze(0), \
                            ref_bbox[:, :5].unsqueeze(0), frm_mask.unsqueeze(0))
                        results[class_name].append(1 if torch.max(overlap) > self.iou_thresh else 0)

        print('Number of groundable objects in this split: {}'.format(len(results)))
        grd_accu = np.mean([sum(hm)*1./len(hm) for i,hm in results.items()])

        print('-' * 80)
        print('The overall localization accuracy is {:.4f}'.format(grd_accu))
        print('-' * 80)
        if self.verbose:
            print('Object frequency and grounding accuracy per class (descending by object frequency):')
            accu_per_clss = {(i, sum(hm)*1./len(hm)):len(hm) for i,hm in results.items()}
            accu_per_clss = sorted(accu_per_clss.items(), key=lambda x:x[1], reverse=True)
            for accu in accu_per_clss:
                print('{} ({}): {:.4f}'.format(accu[0][0], accu[1], accu[0][1]))

        return grd_accu