def parse()

in atis.py [0:0]


def parse(data_file_name, out_file_name, language, single_q):
    with open(args.qas_file, 'r') as f:
        qas = json.load(f)

    intent_questions = qas['intents']
    slot_questions = qas['slots']

    qas = {'title': "MultiATIS++", 'paragraphs': []}
    misalignments = 0

    # Load data
    _, utterances, tag_rows, intent_rows = load_tsv(data_file_name)

    for row in tqdm(range(len(utterances))):
        paragraph = {
            'context': '',
            'qas': [],
            'slots': []
        }

        # Ignore data id, as we want an id per question
        words, tags, intents = utterances[row], tag_rows[row], intent_rows[row]
        # Special cases for hi and tr
        if language in ['hi', 'tr']:
            intents = intents.replace(' airfare', '#atis_airfare')
            intents = intents.replace(' airline', '#atis_airline')
            intents = intents.replace(' flight', '#atis_flight')
            intents = intents.replace(' flight_no', '#atis_flight_no')

        intents = intents.split('#')

        utterance = ''
        if USE_INTENTS:
            utterance = 'yes. no. '

        prev_slot = ''
        slot_annotations = {}
        annotation = {}

        if len(tags) != len(words):
            misalignments += 1
            continue

        # Retrieve annotations
        for t in range(len(tags)):
            tag = tags[t]
            word = words[t]
            slot = ''

            if tag != 'O':
                # Remove B- or I-
                slot = tag[2:]

                if slot != prev_slot:
                    # Save previous annotation, if any
                    if annotation:
                        if annotation['slot'] not in slot_annotations:
                            slot_annotations[annotation['slot']] = [annotation]

                        else:
                            slot_annotations[annotation['slot']].append(annotation)

                    annotation = {
                        'slot': slot,
                        'answer_start': len(utterance),
                        'answer_stop': len(utterance) + len(word) + 1
                    }

                else:
                    annotation['answer_stop'] = len(utterance) + len(word) + 1

            utterance += word + ' '
            prev_slot = slot

        # Save last annotation
        if annotation:
            if annotation['slot'] not in slot_annotations:
                slot_annotations[annotation['slot']] = [annotation]

            else:
                slot_annotations[annotation['slot']].append(annotation)

        # Trim trailing space
        utterance = utterance.rstrip()
        paragraph['context'] = utterance

        # Positive slot questions
        for slot in slot_annotations:
            answers = []
            for answer in slot_annotations[slot]:
                answers.append(
                    {
                        'text': utterance[answer['answer_start']:answer['answer_stop']].rstrip(),
                        'answer_start': answer['answer_start']}
                )

            append_question(paragraph, slot_questions, language, False, slot, answers, single_q)

        # Negative slot questions
        for i in slot_questions[language]:
            if i in slot_annotations:
                continue

            append_question(paragraph, slot_questions, language, True, i, [], single_q)

        if USE_INTENTS:
            # Positive intent questions
            for intent in intents:
                append_question(
                    paragraph, intent_questions, language, False, intent, [{'text': 'yes', 'answer_start': 0}],
                    single_q, is_intent=True
                )

            # Negative intent questions
            for i in intent_questions[language]:
                if i == intent:
                    continue

                append_question(
                    paragraph, intent_questions, language, True, i, [{'text': 'no', 'answer_start': 5}],
                    single_q, is_intent=True
                )

        qas['paragraphs'].append(paragraph)

    print(f'Saving {language}...')
    with open(out_file_name, 'w') as f:
        json.dump(qas, f, indent=4)

    print(f'{language} misalignments: {misalignments} out of {len(utterances)}')