def evaluate()

in evaluation/eval_coco_retrieval.py [0:0]


def evaluate(args, model, dataloader):
    score = 0
    total_loss = 0
    num_data = 0
    count = 0

    score_matrix = np.zeros((5000, 1000))
    target_matrix = np.zeros((5000, 1000))
    rank_matrix = np.ones((5000)) * 1000
    model.eval()
    for batch in tqdm(iter(dataloader)):
        batch = tuple(t.cuda() for t in batch)
        features, spatials, image_mask, caption, input_mask, segment_ids, target, caption_idx, image_idx = (
            batch
        )

        features = features.squeeze(0)
        spatials = spatials.squeeze(0)
        image_mask = image_mask.squeeze(0)

        with torch.no_grad():
            if args.zero_shot:
                _, _, logit, _ = model(
                    caption, features, spatials, segment_ids, input_mask, image_mask
                )
                score_matrix[caption_idx, image_idx * 500 : (image_idx + 1) * 500] = (
                    torch.softmax(logit, dim=1)[:, 0].view(-1).cpu().numpy()
                )
                target_matrix[caption_idx, image_idx * 500 : (image_idx + 1) * 500] = (
                    target.float().cpu().numpy()
                )
            else:
                logit = model(
                    caption, features, spatials, segment_ids, input_mask, image_mask
                )
                score_matrix[caption_idx, image_idx * 500 : (image_idx + 1) * 500] = (
                    logit.view(-1).cpu().numpy()
                )
                target_matrix[caption_idx, image_idx * 500 : (image_idx + 1) * 500] = (
                    target.float().cpu().numpy()
                )

            if image_idx.item() == 1:
                rank = np.where(
                    (
                        np.argsort(-score_matrix[caption_idx])
                        == np.where(target_matrix[caption_idx] == 1)[0][0]
                    )
                    == 1
                )[0][0]
                rank_matrix[caption_idx] = rank

                rank_matrix_tmp = rank_matrix[: caption_idx + 1]
                r1 = 100.0 * np.sum(rank_matrix_tmp < 1) / len(rank_matrix_tmp)
                r5 = 100.0 * np.sum(rank_matrix_tmp < 5) / len(rank_matrix_tmp)
                r10 = 100.0 * np.sum(rank_matrix_tmp < 10) / len(rank_matrix_tmp)

                medr = np.floor(np.median(rank_matrix_tmp) + 1)
                meanr = np.mean(rank_matrix_tmp) + 1
                logger.info(
                    "%d Final r1:%.3f, r5:%.3f, r10:%.3f, mder:%.3f, meanr:%.3f"
                    % (count, r1, r5, r10, medr, meanr)
                )

        count += 1

    r1 = 100.0 * np.sum(rank_matrix < 1) / len(rank_matrix)
    r5 = 100.0 * np.sum(rank_matrix < 5) / len(rank_matrix)
    r10 = 100.0 * np.sum(rank_matrix < 10) / len(rank_matrix)

    medr = np.floor(np.median(rank_matrix) + 1)
    meanr = np.mean(rank_matrix) + 1
    logger.info(
        "Final r1:%.3f, r5:%.3f, r10:%.3f, mder:%.3f, meanr:%.3f"
        % (r1, r5, r10, medr, meanr)
    )
    return r1, r5, r10, medr, meanr