in captum/attr/_models/pytext.py [0:0]
def _generate_baseline_single_dict_feature(self, device):
r"""Generate dict features based on Assistant's case study by using
sia_transformer:
fbcode/assistant/sia/transformer/sia_transformer.py
sia_transformer generates dict features in a special gazetter format
See `fbsource/fbcode/pytext/models/embeddings/dict_embedding.py`
It generates word dict feature embeddings for each word token.
The output of SIATransformer after running it on `<pad>` token
looks as following:
OutputRecord(tokens=['<', 'pad', '>'],
token_ranges=[(0, 1), (1, 4), (4, 5)],
gazetteer_feats=['<pad>', '<pad>', '<pad>'],
gazetteer_feat_lengths=[1, 1, 1],
gazetteer_feat_weights=[0.0, 0.0, 0.0],
characters=[['<', '<pad>', '<pad>'],
['p', 'a', 'd'], ['>', '<pad>', '<pad>']],
pretrained_token_embedding=[ ], dense_feats=None)
"""
gazetteer_feats = [self.PAD, self.PAD, self.PAD]
gazetteer_feat_lengths = [1, 1, 1]
gazetteer_feat_weights = [0.0, 0.0, 0.0]
gazetteer_feat_id = (
torch.tensor(
[
self.vocab_dict.stoi[gazetteer_feat]
if hasattr(self, "vocab_dict")
else 0
for gazetteer_feat in gazetteer_feats
]
)
.unsqueeze(0)
.to(device)
)
gazetteer_feat_weights = (
torch.tensor(gazetteer_feat_weights).unsqueeze(0).to(device)
)
gazetteer_feat_lengths = (
torch.tensor(gazetteer_feat_lengths).to(device).view(1, -1)[:, 1]
)
return (gazetteer_feat_id, gazetteer_feat_weights, gazetteer_feat_lengths)