in scripts/adapet/ADAPET/setfit_adapet.py [0:0]
def write_generic_json(task_name, lang_pattern, verbalizer, updated_args, write_config="config/Generic.json"):
verbalizer = updated_args.dict_verbalizer
max_tokens = updated_args.max_num_lbl_tok
data_dir = updated_args.data_dir
adapet_seed = updated_args.seed
pattern = updated_args.pattern
pretrained_weight = updated_args.pretrained_weight
print('this is the pattern: {}'.format(pattern))
print('this is the verbalizer: {}'.format(verbalizer))
configs = {
"pretrained_weight": pretrained_weight,
"dataset": "generic",
"generic_data_dir": data_dir,
"pattern": pattern,
"pattern_idx": 1,
"dict_verbalizer": verbalizer,
"idx_txt_trim": 1,
"max_text_length": 256,
"batch_size": 1, # default is 1
"eval_batch_size": 1, #default is 1. will crash on larger test sets if > 1
"num_batches": updated_args.num_batches, # default is 1000 MUST BE THE SAME AS eval_every or ADAPET will checkpoint on test set
"max_num_lbl_tok": int(max_tokens), #default is 1. gets automatically updated based on the tokenizer and dataset
"eval_every": updated_args.eval_every, # default is 250 MUST BE THE SAME AS num_batches or ADAPET will checkpoint on test set
"eval_train": True,
"warmup_ratio": 0.06,
"mask_alpha": 0.105,
"grad_accumulation_factor": 16,
"seed": adapet_seed,
"lr": 1e-5,
"weight_decay": 1e-2,
}
if configs['num_batches'] != configs['eval_every']:
raise ValueError("The number of batches and eval_every must be the same value to avoid checkpointing on test set")
generic_json = json.dumps(configs, ensure_ascii=False)
if not os.path.exists("config"):
os.makedirs("config")
try:
os.remove(write_config)
print("old config file deleted")
except OSError:
print("no config file found... writing new file")
pass
with open(write_config, "w") as f:
f.write(generic_json)
print("Generic json file written")
return updated_args