def run_knn_at

def run_knn_at_layer()

in vissl/utils/knn_utils.py [0:0]
63 lines of code
14 McCabe index (conditional complexity)

def run_knn_at_layer(cfg: AttrDict, layer_name: str = "heads"):
    """
    Run the Nearest Neighbour benchmark at the layer "layer_name"
    """
    temperature = cfg.NEAREST_NEIGHBOR.SIGMA
    num_neighbors = cfg.NEAREST_NEIGHBOR.TOPK
    feature_dir = cfg.NEAREST_NEIGHBOR.FEATURES.PATH
    output_dir = get_checkpoint_folder(cfg)
    logging.info(f"Testing with sigma: {temperature}, topk neighbors: {num_neighbors}")

    ############################################################################
    # Step 1: get train and test features
    train_out = ExtractedFeaturesLoader.load_features(
        feature_dir, "train", layer_name, flatten_features=True
    )
    train_features, train_labels = train_out["features"], train_out["targets"]
    test_out = ExtractedFeaturesLoader.load_features(
        feature_dir, "test", layer_name, flatten_features=True
    )
    test_features, test_labels = test_out["features"], test_out["targets"]
    train_features = torch.from_numpy(train_features).float()
    test_features = torch.from_numpy(test_features).float()
    train_labels = torch.LongTensor(train_labels)
    num_classes = train_labels.max() + 1

    ###########################################################################
    # Step 2: calculate the nearest neighbor and the metrics
    accuracies = Accuracies()
    if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
        train_features = nn.functional.normalize(train_features, dim=1, p=2)
        test_features = nn.functional.normalize(test_features, dim=1, p=2)

    # put train features and labels on gpu and transpose train features
    if cfg.NEAREST_NEIGHBOR.USE_CUDA:
        train_features = train_features.cuda().t()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
    else:
        train_features = train_features.t()

    num_test_images, num_chunks = test_labels.shape[0], 100
    imgs_per_chunk = num_test_images // num_chunks
    output_targets, output_predicted_label, output_inds = [], [], []
    with torch.no_grad():
        for idx in range(0, num_test_images, imgs_per_chunk):
            # get the features for test images and normalize the features if needed
            features = test_features[
                idx : min((idx + imgs_per_chunk), num_test_images), :
            ]
            targets = test_labels[idx : min((idx + imgs_per_chunk), num_test_images), :]
            batch_size = targets.shape[0]
            targets = torch.LongTensor(targets)
            if cfg.NEAREST_NEIGHBOR.USE_CUDA:
                targets = torch.LongTensor(targets).cuda()

            # calculate the dot product and compute top-k neighbors
            similarity = torch.mm(features, train_features)
            distances, indices = similarity.topk(
                num_neighbors, largest=True, sorted=True
            )
            candidates = train_labels.view(1, -1).expand(batch_size, -1)
            retrieved_neighbors = torch.gather(candidates, 1, indices)

            retrieval_one_hot = torch.zeros(batch_size * num_neighbors, num_classes)
            if cfg.NEAREST_NEIGHBOR.USE_CUDA:
                retrieval_one_hot = retrieval_one_hot.cuda()
            retrieval_one_hot.scatter_(1, retrieved_neighbors.view(-1, 1), 1)
            predictions = _get_sorted_predictions(
                batch_size, num_classes, distances, retrieval_one_hot, temperature
            )

            # find the predictions that match the target
            accuracies = accuracies + Accuracies.from_batch(predictions, targets)

            # get the predictions, nearest neighbors, inds to save
            output_inds.extend(range(idx, min((idx + imgs_per_chunk), num_test_images)))
            output_predicted_label.append(predictions.data.cpu().numpy())
            output_targets.append(targets.data.cpu().numpy())

    _save_knn_results(
        output_dir, layer_name, output_inds, output_predicted_label, output_targets
    )
    accuracies.log(layer_name)
    return accuracies.top_1, accuracies.top_5, accuracies.total