in dialogue_personalization/model/common_layer.py [0:0]
def evaluate(model, data, model_name='trs', ty='valid', writer=None, n_iter=0, ty_eval="before", verbose=False ):
dial,ref, hyp_b= [],[],[]
t = Translator(model, model.vocab)
l = []
p = []
ent_b = []
pbar = tqdm(enumerate(data),total=len(data))
for j, batch in pbar:
loss, ppl, _ = model.train_one_batch(batch, train=False)
l.append(loss)
p.append(ppl)
if((j<3 and ty != "test") or ty =="test"):
sent_b, _ = t.translate_batch(batch)
for i in range(len(batch["target_txt"])):
new_words = []
for w in sent_b[i][0]:
if w==config.EOS_idx:
break
new_words.append(w)
if len(new_words)>2 and (new_words[-2]==w):
new_words.pop()
sent_beam_search = ' '.join([model.vocab.index2word[idx] for idx in new_words])
hyp_b.append(sent_beam_search)
ref.append(batch["target_txt"][i])
dial.append(batch['input_txt'][i])
ent_b.append(bert.predict_label([sent_beam_search for _ in range(len(batch['persona_txt'][i]))], batch['persona_txt'][i]))
pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(np.mean(l),np.mean(p)))
if(j>4 and ty=="train"): break
loss = np.mean(l)
ppl = np.mean(p)
ent_b = np.mean(ent_b)
bleu_score_b = moses_multi_bleu(np.array(hyp_b), np.array(ref), lowercase=True)
if(verbose):
print("----------------------------------------------------------------------")
print("----------------------------------------------------------------------")
print_all(dial,ref,hyp_b,max_print=3 if ty != "test" else 100000000 )
print("EVAL\tLoss\tPeplexity\tEntl_b\tBleu_b")
print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.2f}".format(ty,loss,ppl,ent_b,bleu_score_b))
return loss,ppl,ent_b,bleu_score_b