in atis.py [0:0]
def parse(data_file_name, out_file_name, language, single_q):
    with open(args.qas_file, 'r') as f:
        qas = json.load(f)
    intent_questions = qas['intents']
    slot_questions = qas['slots']
    qas = {'title': "MultiATIS++", 'paragraphs': []}
    misalignments = 0
    # Load data
    _, utterances, tag_rows, intent_rows = load_tsv(data_file_name)
    for row in tqdm(range(len(utterances))):
        paragraph = {
            'context': '',
            'qas': [],
            'slots': []
        }
        # Ignore data id, as we want an id per question
        words, tags, intents = utterances[row], tag_rows[row], intent_rows[row]
        # Special cases for hi and tr
        if language in ['hi', 'tr']:
            intents = intents.replace(' airfare', '#atis_airfare')
            intents = intents.replace(' airline', '#atis_airline')
            intents = intents.replace(' flight', '#atis_flight')
            intents = intents.replace(' flight_no', '#atis_flight_no')
        intents = intents.split('#')
        utterance = ''
        if USE_INTENTS:
            utterance = 'yes. no. '
        prev_slot = ''
        slot_annotations = {}
        annotation = {}
        if len(tags) != len(words):
            misalignments += 1
            continue
        # Retrieve annotations
        for t in range(len(tags)):
            tag = tags[t]
            word = words[t]
            slot = ''
            if tag != 'O':
                # Remove B- or I-
                slot = tag[2:]
                if slot != prev_slot:
                    # Save previous annotation, if any
                    if annotation:
                        if annotation['slot'] not in slot_annotations:
                            slot_annotations[annotation['slot']] = [annotation]
                        else:
                            slot_annotations[annotation['slot']].append(annotation)
                    annotation = {
                        'slot': slot,
                        'answer_start': len(utterance),
                        'answer_stop': len(utterance) + len(word) + 1
                    }
                else:
                    annotation['answer_stop'] = len(utterance) + len(word) + 1
            utterance += word + ' '
            prev_slot = slot
        # Save last annotation
        if annotation:
            if annotation['slot'] not in slot_annotations:
                slot_annotations[annotation['slot']] = [annotation]
            else:
                slot_annotations[annotation['slot']].append(annotation)
        # Trim trailing space
        utterance = utterance.rstrip()
        paragraph['context'] = utterance
        # Positive slot questions
        for slot in slot_annotations:
            answers = []
            for answer in slot_annotations[slot]:
                answers.append(
                    {
                        'text': utterance[answer['answer_start']:answer['answer_stop']].rstrip(),
                        'answer_start': answer['answer_start']}
                )
            append_question(paragraph, slot_questions, language, False, slot, answers, single_q)
        # Negative slot questions
        for i in slot_questions[language]:
            if i in slot_annotations:
                continue
            append_question(paragraph, slot_questions, language, True, i, [], single_q)
        if USE_INTENTS:
            # Positive intent questions
            for intent in intents:
                append_question(
                    paragraph, intent_questions, language, False, intent, [{'text': 'yes', 'answer_start': 0}],
                    single_q, is_intent=True
                )
            # Negative intent questions
            for i in intent_questions[language]:
                if i == intent:
                    continue
                append_question(
                    paragraph, intent_questions, language, True, i, [{'text': 'no', 'answer_start': 5}],
                    single_q, is_intent=True
                )
        qas['paragraphs'].append(paragraph)
    print(f'Saving {language}...')
    with open(out_file_name, 'w') as f:
        json.dump(qas, f, indent=4)
    print(f'{language} misalignments: {misalignments} out of {len(utterances)}')