in longform-qa/lfqa_utils.py [0:0]
def make_qa_retriever_model(model_name="google/bert_uncased_L-8_H-512_A-8", from_file=None, device="cuda:0"):
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert_model = AutoModel.from_pretrained(model_name).to(device)
# run bert_model on a dummy batch to get output dimension
d_ids = torch.LongTensor(
[[bert_model.config.bos_token_id if bert_model.config.bos_token_id is not None else 1]]
).to(device)
d_mask = torch.LongTensor([[1]]).to(device)
sent_dim = bert_model(d_ids, attention_mask=d_mask)[1].shape[-1]
qa_embedder = RetrievalQAEmbedder(bert_model, sent_dim).to(device)
if from_file is not None:
param_dict = torch.load(from_file) # has model weights, optimizer, and scheduler states
qa_embedder.load_state_dict(param_dict["model"])
return tokenizer, qa_embedder