in atis.py [0:0]
def main(args):
languages = args.languages.split(',')
filenames = []
for language in languages:
print(f'Parsing {language}...')
for dataset in ['train', 'dev', 'test']:
# Parse and aggregate the data
parse(
os.path.join(args.data_path, f'{dataset}_{language.upper()}.tsv'),
os.path.join(args.output_dir, f'matis_{language}_{dataset}_qas.json'),
language,
args.single_q
)
agg_data(
[os.path.join(args.output_dir, f'matis_{language}_{dataset}_qas.json')],
os.path.join(args.output_dir, f'matis_{language}_{dataset}_squad.json')
)
# Aggregate data
if args.aggregate_data:
print('Aggregating data...')
agg_data(
filenames + [os.path.join(args.output_dir, f'matis_en_train_qas.json')],
os.path.join(args.output_dir, f'matis_mt/matis_mt_multi_lingual_train_squad.json')
)