submission_code/best_util.py (60 lines of code) (raw):
import pandas as pd
import numpy as np
import joblib
import torch
import torch.nn as nn
import torch.utils.data as data
import sentencepiece as spm
from transformers import BertModel, BertConfig
class BertClassifier(nn.Module):
def __init__(self, config, pad_id, num_classes):
super(BertClassifier, self).__init__()
bert_config = BertConfig(
vocab_size=config.src_vocab_size,
hidden_size=config.h_size,
num_hidden_layers=config.n_layers,
num_attention_heads=config.n_heads,
intermediate_size=config.d_ff,
hidden_dropout_prob = config.dropout,
pad_token_id=pad_id,
)
self.tr = BertModel(config=bert_config)
self.drop = nn.Dropout(config.dropout)
self.out = nn.Linear(config.h_size, num_classes)
def forward(self, x):
attn = (x != 0).float()
x = self.tr(
input_ids=x,
attention_mask=attn,
return_dict=True
)
x = x.last_hidden_state.mean(dim=1)
x = self.drop(x)
x = self.out(x)
return x
bos_id = 1
eos_id = 2
pad_id = 0
class BestUtilModel:
def __init__(self, config, file_path, model_path, device):
self.config = config
self.device = device
self.cmd_le = joblib.load(f'{file_path}/cmd_encoder')
self.text_tokenizer = spm.SentencePieceProcessor(f'{file_path}/txt_bpe_clf.model')
self.model = BertClassifier(self.config, pad_id, len(self.cmd_le.classes_))
self.model.load_state_dict(torch.load(model_path, map_location=device)['model_state_dict'])
self.model.eval()
self.model.to(device)
def predict_many(self, texts, beam_width=5):
text_enc = [self.text_tokenizer.encode(x) for x in texts]
tokens = nn.utils.rnn.pad_sequence([torch.tensor([bos_id] + x[:self.config.max_src_len] + [eos_id]).long() for x in text_enc],
batch_first=True, padding_value=pad_id)
pred_utils = []
with torch.no_grad():
tokens = tokens.to(self.device)
logits = self.model(tokens).cpu().numpy()
topk = np.argpartition(-logits, beam_width-1, axis=1)[:,:beam_width]
for i in range(len(texts)):
pred = list(zip(self.cmd_le.inverse_transform(topk[i]), logits[i, topk[i]]))
pred_utils.append(pred)
return pred_utils