in atis.py [0:0]
def sample_few_shot_data(args):
for source_lang in args['languages'].split(','):
with open(os.path.join(args.output_dir, f'matis_{source_lang}_train_squad.json'), 'r') as data_f:
source_data = json.load(data_f)
for target_lang in args['languages'].split(','):
if source_lang == target_lang:
continue
print(f'Sampling {target_lang} few shot examples, for {source_lang} training data.')
with open(os.path.join(args.output_dir, f'matis_{target_lang}_train_squad.json'), 'r') as data_f:
data = json.load(data_f)
tmp_data = source_data
if args['ex_uniform'] > 0:
tmp_data['data'][0]['paragraphs'] += random.sample(data['data'][0]['paragraphs'], args['ex_uniform'])
out_file_name = os.path.join(args.output_dir, f'matis_few_shot/matis_{source_lang}_{args["ex_uniform"]}ex_uniform_{target_lang}_squad.json')
with open(out_file_name, 'w') as f:
json.dump(tmp_data, f, indent=4)
os.system(f'rm {args.output_dir}/matis_tmp.json')