def featurize_dataset()

in src/run_paraphrase.py [0:0]


def featurize_dataset(dataset, roberta, batch_size):
    feat_dicts = []
    batches = make_batches(dataset, batch_size)
    with torch.no_grad():
        for batch in tqdm(batches, desc='Computing BERTScore'):
            x0_batch = collate_tokens([x[0][0] for x in batch], pad_idx=PAD_TOKEN)
            x1_batch = collate_tokens([x[0][1] for x in batch], pad_idx=PAD_TOKEN)
            r0_batch = roberta.model(x0_batch.to(roberta.device),
                                    features_only=True, 
                                    return_all_hiddens=True)[1]['inner_states']  # Layer -> L, B, d
            r1_batch = roberta.model(x1_batch.to(roberta.device),
                                    features_only=True, 
                                    return_all_hiddens=True)[1]['inner_states']  # Layer -> L, B, d
            for i, ((x0, x1), y) in enumerate(batch):
                cur_feats = {}
                for layer in range(FINAL_LAYER + 1):
                    r0_i = r0_batch[layer][:len(x0), i, :].cpu()
                    r1_i = r1_batch[layer][:len(x1), i, :].cpu()
                    cur_feats[f'bertscore-{layer}'] = compute_similarity(r0_i, r1_i, 'bertscore').item()
                feat_dicts.append(cur_feats)
    return feat_dicts