exploring_exploration/utils/reconstruction_eval.py [346:407]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                vis_retrieved_clusters = []
                vis_pred_logits = episode_preds[interval][:, 0]  # (N, nclasses)
                vis_pred_scores = F.softmax(
                    torch.Tensor(vis_pred_logits), dim=1
                ).numpy()  # (N, nclasses)
                vis_gt_feats = episode_gts[interval][:, 0]  # (N, feat_dim)
                # Compute similarity between GT features and all clusters
                vis_gt_sim = vis_gt_feats  # (N, nclasses)
                # Top 5 sim scores for GT features
                vis_gt_topk_idxes = np.argpartition(vis_gt_sim, -5, axis=1)[
                    :, -5:
                ]  # (N, 5)

                # Sample clusters predicted by the network
                for j in range(min(vis_gt_sim.shape[0], 12)):
                    vis_gt_image_j = cv2.resize(vis_gt_images[j], (300, 300))
                    # Add zero padding on top for text
                    vis_gt_image_j = np.pad(
                        vis_gt_image_j, ((100, 0), (0, 0), (0, 0)), mode="constant"
                    )
                    gt_sim_text = ",".join(
                        "{:.2f}".format(vis_gt_sim[j, v.item()])
                        for v in vis_gt_topk_idxes[j]
                    )
                    vis_gt_image_j = cv2.putText(
                        vis_gt_image_j,
                        "Best GT sim",
                        (5, 45),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1.0,
                        (255, 255, 255),
                        thickness=2,
                    )
                    vis_gt_image_j = cv2.putText(
                        vis_gt_image_j,
                        gt_sim_text,
                        (5, 95),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.7,
                        (255, 255, 255),
                        thickness=2,
                    )

                    proc_retrieved_clusters = [vis_gt_image_j]
                    for k in topk_matches[j][0]:
                        if clusters2images[k].shape[0] == 0:
                            continue
                        # Pick a random set of 9 cluster images
                        random_idxes = np.random.randint(
                            0, clusters2images[k].shape[0], (9,)
                        )
                        ret_images = clusters2images[k][random_idxes]  # (9, H, W, C)
                        H, W = ret_images.shape[1:3]
                        ret_images = ret_images.reshape(
                            3, 3, *ret_images.shape[1:]
                        )  # (3, 3, H, W, C)
                        ret_images = np.ascontiguousarray(
                            ret_images.transpose(0, 2, 1, 3, 4)
                        )
                        ret_images = ret_images.reshape(3 * H, 3 * W, -1)
                        ret_images = draw_border(ret_images[np.newaxis, ...])[0]
                        ret_images = cv2.resize(ret_images, (300, 300))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


exploring_exploration/utils/reconstruction_eval.py [1140:1202]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                vis_retrieved_clusters = []
                vis_pred_logits = episode_preds[interval][:, 0]  # (N, nclasses)
                vis_pred_scores = F.softmax(
                    torch.Tensor(vis_pred_logits), dim=1
                ).numpy()  # (N, nclasses)
                vis_gt_feats = episode_gts[interval][:, 0]  # (N, feat_dim)
                # Compute similarity between GT features and all clusters
                # vis_gt_sim = np.matmul(vis_gt_feats, cluster_centroids_np_T) # (N, nclasses)
                vis_gt_sim = vis_gt_feats  # (N, nclasses)
                # Top 5 sim scores for GT features
                vis_gt_topk_idxes = np.argpartition(vis_gt_sim, -5, axis=1)[
                    :, -5:
                ]  # (N, 5)

                # Sample clusters predicted by the network
                for j in range(min(vis_gt_sim.shape[0], 12)):
                    vis_gt_image_j = cv2.resize(vis_gt_images[j], (300, 300))
                    # Add zero padding on top for text
                    vis_gt_image_j = np.pad(
                        vis_gt_image_j, ((100, 0), (0, 0), (0, 0)), mode="constant"
                    )
                    gt_sim_text = ",".join(
                        "{:.2f}".format(vis_gt_sim[j, v.item()])
                        for v in vis_gt_topk_idxes[j]
                    )
                    vis_gt_image_j = cv2.putText(
                        vis_gt_image_j,
                        "Best GT sim",
                        (5, 45),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1.0,
                        (255, 255, 255),
                        thickness=2,
                    )
                    vis_gt_image_j = cv2.putText(
                        vis_gt_image_j,
                        gt_sim_text,
                        (5, 95),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.7,
                        (255, 255, 255),
                        thickness=2,
                    )

                    proc_retrieved_clusters = [vis_gt_image_j]
                    for k in topk_matches[j][0]:
                        if clusters2images[k].shape[0] == 0:
                            continue
                        # Pick a random set of 9 cluster images
                        random_idxes = np.random.randint(
                            0, clusters2images[k].shape[0], (9,)
                        )
                        ret_images = clusters2images[k][random_idxes]  # (9, H, W, C)
                        H, W = ret_images.shape[1:3]
                        ret_images = ret_images.reshape(
                            3, 3, *ret_images.shape[1:]
                        )  # (3, 3, H, W, C)
                        ret_images = np.ascontiguousarray(
                            ret_images.transpose(0, 2, 1, 3, 4)
                        )
                        ret_images = ret_images.reshape(3 * H, 3 * W, -1)
                        ret_images = draw_border(ret_images[np.newaxis, ...])[0]
                        ret_images = cv2.resize(ret_images, (300, 300))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -