in T5DST/T5.py [0:0]
def train(args, *more):
args = vars(args)
args["model_name"] = args["model_checkpoint"]+args["model_name"]+"_except_domain_"+args["except_domain"]+ "_slotlang_" +str(args["slot_lang"]) + "_lr_" +str(args["lr"]) + "_epoch_" + str(args["n_epochs"]) + "_seed_" + str(args["seed"])
# train!
seed_everything(args["seed"])
if "t5" in args["model_name"]:
model = T5ForConditionalGeneration.from_pretrained(args["model_checkpoint"])
tokenizer = T5Tokenizer.from_pretrained(args["model_checkpoint"], bos_token="[bos]", eos_token="[eos]", sep_token="[sep]")
model.resize_token_embeddings(new_num_tokens=len(tokenizer))
elif "bart" in args["model_name"]:
model = BartForConditionalGeneration.from_pretrained(args["model_checkpoint"])
tokenizer = BartTokenizer.from_pretrained(args["model_checkpoint"], bos_token="[bos]", eos_token="[eos]", sep_token="[sep]")
model.resize_token_embeddings(new_num_tokens=len(tokenizer))
task = DST_Seq2Seq(args, tokenizer, model)
train_loader, val_loader, test_loader, ALL_SLOTS, fewshot_loader_dev, fewshot_loader_test = prepare_data(args, task.tokenizer)
#save model path
save_path = os.path.join(args["saving_dir"],args["model_name"])
if not os.path.exists(save_path):
os.makedirs(save_path)
trainer = Trainer(
default_root_dir=save_path,
accumulate_grad_batches=args["gradient_accumulation_steps"],
gradient_clip_val=args["max_norm"],
max_epochs=args["n_epochs"],
callbacks=[pl.callbacks.EarlyStopping(monitor='val_loss',min_delta=0.00, patience=5,verbose=False, mode='min')],
gpus=args["GPU"],
deterministic=True,
num_nodes=1,
#precision=16,
accelerator="ddp"
)
trainer.fit(task, train_loader, val_loader)
task.model.save_pretrained(save_path)
task.tokenizer.save_pretrained(save_path)
print("test start...")
#evaluate model
_ = evaluate_model(args, task.tokenizer, task.model, test_loader, save_path, ALL_SLOTS)