def _generate_baseline_single_dict_feature()

in captum/attr/_models/pytext.py [0:0]


    def _generate_baseline_single_dict_feature(self, device):
        r"""Generate dict features based on Assistant's case study by using
         sia_transformer:
         fbcode/assistant/sia/transformer/sia_transformer.py
         sia_transformer generates dict features in a special gazetter format
         See `fbsource/fbcode/pytext/models/embeddings/dict_embedding.py`

         It generates word dict feature embeddings for each word token.

         The output of SIATransformer after running it on `<pad>` token
         looks as following:
        OutputRecord(tokens=['<', 'pad', '>'],
                     token_ranges=[(0, 1), (1, 4), (4, 5)],
                     gazetteer_feats=['<pad>', '<pad>', '<pad>'],
                     gazetteer_feat_lengths=[1, 1, 1],
                     gazetteer_feat_weights=[0.0, 0.0, 0.0],
                     characters=[['<', '<pad>', '<pad>'],
                                ['p', 'a', 'd'], ['>', '<pad>', '<pad>']],
                     pretrained_token_embedding=[ ], dense_feats=None)
        """
        gazetteer_feats = [self.PAD, self.PAD, self.PAD]
        gazetteer_feat_lengths = [1, 1, 1]
        gazetteer_feat_weights = [0.0, 0.0, 0.0]
        gazetteer_feat_id = (
            torch.tensor(
                [
                    self.vocab_dict.stoi[gazetteer_feat]
                    if hasattr(self, "vocab_dict")
                    else 0
                    for gazetteer_feat in gazetteer_feats
                ]
            )
            .unsqueeze(0)
            .to(device)
        )
        gazetteer_feat_weights = (
            torch.tensor(gazetteer_feat_weights).unsqueeze(0).to(device)
        )
        gazetteer_feat_lengths = (
            torch.tensor(gazetteer_feat_lengths).to(device).view(1, -1)[:, 1]
        )

        return (gazetteer_feat_id, gazetteer_feat_weights, gazetteer_feat_lengths)