in split_data.py [0:0]
def main(args):
#load parsed wiktionary data
q_path = os.path.join(args.raw_dir, 'quotations.txt')
quotes = load_quotations(q_path)
s_path = os.path.join(args.raw_dir, 'senses.txt')
senses = load_senses(s_path)
ex_path = os.path.join(args.raw_dir, 'examples.txt')
examples = load_examples(ex_path)
#split quotes into train/dev/test
data, monosemous_data = filter_monosemous_data(quotes, senses)
train, fs_dev, zs_dev, fs_test, zs_test = split_data(data, senses)
#save train data
train_path = os.path.join(args.save_dir, 'train.txt')
save_examples(train_path, train)
#create and save train extended
#(adds examples as extra train data)
random.shuffle(examples)
#filter monosymous senses from examples
ext, monosemous_ext = filter_monosemous_data(examples, senses)
#filter senses in zero-shot splits from examples
zero_shot_examples = zs_dev+zs_test
ext = filter_senses(ext, zero_shot_examples)
ext = train+ext
ext_path = os.path.join(args.save_dir, 'train.ext.txt')
save_examples(ext_path, ext)
#save dev data
fs_dev_path = os.path.join(args.save_dir, 'dev.few-shot.txt')
save_examples(fs_dev_path, fs_dev)
zs_dev_path = os.path.join(args.save_dir, 'dev.zero-shot.txt')
save_examples(zs_dev_path, zs_dev)
#save test data
fs_test_path = os.path.join(args.save_dir, 'test.few-shot.txt')
save_examples(fs_test_path, fs_test)
zs_test_path = os.path.join(args.save_dir, 'test.zero-shot.txt')
save_examples(zs_test_path, zs_test)
#save monosemous examples as extra data
mono_path = os.path.join(args.save_dir, 'monosemous.txt')
mono_examples = monosemous_data+monosemous_ext
save_examples(mono_path, mono_examples)