in dialogue_personalization/model/transformer.py [0:0]
def __init__(self, vocab, model_file_path=None, is_eval=False, load_optim=False):
super(Transformer, self).__init__()
self.vocab = vocab
self.vocab_size = vocab.n_words
self.embedding = share_embedding(self.vocab,config.preptrained)
self.encoder = Encoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads,
total_key_depth=config.depth, total_value_depth=config.depth,
filter_size=config.filter,universal=config.universal)
self.decoder = Decoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads,
total_key_depth=config.depth,total_value_depth=config.depth,
filter_size=config.filter,universal=config.universal)
self.generator = Generator(config.hidden_dim,self.vocab_size)
if config.weight_sharing:
# Share the weight matrix between target word embedding & the final logit dense layer
self.generator.proj.weight = self.embedding.weight
self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx)
if (config.label_smoothing):
self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=config.PAD_idx, smoothing=0.1)
self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx)
if is_eval:
self.encoder = self.encoder.eval()
self.decoder = self.decoder.eval()
self.generator = self.generator.eval()
self.embedding = self.embedding.eval()
self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr)
if(config.noam):
self.optimizer = NoamOpt(config.hidden_dim, 1, 4000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
if config.use_sgd:
self.optimizer = torch.optim.SGD(self.parameters(), lr=config.lr)
if model_file_path is not None:
print("loading weights")
state = torch.load(model_file_path, map_location= lambda storage, location: storage)
print("LOSS",state['current_loss'])
self.encoder.load_state_dict(state['encoder_state_dict'])
self.decoder.load_state_dict(state['decoder_state_dict'])
self.generator.load_state_dict(state['generator_dict'])
self.embedding.load_state_dict(state['embedding_dict'])
if (load_optim):
self.optimizer.load_state_dict(state['optimizer'])
if (config.USE_CUDA):
self.encoder = self.encoder.cuda()
self.decoder = self.decoder.cuda()
self.generator = self.generator.cuda()
self.criterion = self.criterion.cuda()
self.embedding = self.embedding.cuda()
self.model_dir = config.save_path
if not os.path.exists(self.model_dir):
os.makedirs(self.model_dir)
self.best_path = ""