in empchat/datasets/loader.py [0:0]
def __init__(self, opt, dictionary=None):
self.opt = opt
self.dataset_name = opt.dataset_name
if self.dataset_name in ["dailydialog", "empchat"]:
if dictionary is not None:
self.temp_dict = ParlAIDictionary.create_from_reddit_style(dictionary)
else:
self.dict = build_dictionary(opt)
if EMPTYPERSONA_TOKEN not in self.dict["words"]:
self.dict["iwords"].append(EMPTYPERSONA_TOKEN)
self.dict["words"] = {
w: i for i, w in enumerate(self.dict["iwords"])
}
self.temp_dict = ParlAIDictionary.create_from_reddit_style(self.dict)
self.dict = dictionary or self.temp_dict.as_reddit_style_dict()
elif self.dataset_name == "reddit":
self.dict = dictionary or build_dictionary(opt)
if EMPTYPERSONA_TOKEN not in self.dict["words"]:
self.dict["iwords"].append(EMPTYPERSONA_TOKEN)
self.dict["words"] = {w: i for i, w in enumerate(self.dict["iwords"])}
else:
raise ValueError("Dataset name unrecognized!")
self.pad_idx = self.dict["words"][PAD_TOKEN]