def convert_json_to_flattened()

in mm_dst/gpt2_dst/utils/convert.py [0:0]


def convert_json_to_flattened(
        input_path_json,
        output_path_predict,
        output_path_target,
        len_context=2,
        use_multimodal_contexts=True,
        input_path_special_tokens='',
        output_path_special_tokens=''):
    """
        Input: JSON representation of the dialogs
        Output: line-by-line stringified representation of each turn
    """

    with open(input_path_json, 'r') as f_in:
        data = json.load(f_in)['dialogue_data']

    predicts = []
    targets = []
    if input_path_special_tokens != '':
        with open(input_path_special_tokens, 'r') as f_in:
            special_tokens = json.load(f_in)
    else:
        special_tokens = {
            "eos_token": END_OF_SENTENCE,
            "additional_special_tokens": [
                END_OF_BELIEF
            ]
        }
        if use_multimodal_contexts:
            special_tokens = {
                "eos_token": END_OF_SENTENCE,
                "additional_special_tokens": [
                    END_OF_BELIEF,
                    START_OF_MULTIMODAL_CONTEXTS,
                    END_OF_MULTIMODAL_CONTEXTS
                ]
            }

    if output_path_special_tokens != '':
        # If a new output path for special tokens is given,
        # we track new OOVs
        oov = set()

    for _, dialog in enumerate(data):

        prev_asst_uttr = None
        lst_context = []

        for turn in dialog[FIELDNAME_DIALOG]:
            user_uttr = turn[FIELDNAME_USER_UTTR].replace('\n', ' ').strip()
            user_belief = turn[FIELDNAME_BELIEF_STATE]
            asst_uttr = turn[FIELDNAME_ASST_UTTR].replace('\n', ' ').strip()

            # Format main input context
            context = ''
            if prev_asst_uttr:
                context += f'System : {prev_asst_uttr} '
            context += f'User : {user_uttr}'
            prev_asst_uttr = asst_uttr

            # Add multimodal contexts
            if use_multimodal_contexts:
                visual_objects = turn[FIELDNAME_VISUAL_OBJECTS]
                context += ' ' + represent_visual_objects(visual_objects)

            # Concat with previous contexts
            lst_context.append(context)
            context = ' '.join(lst_context[-len_context:])

            # Format belief state
            belief_state = []
            for bs_per_frame in user_belief:
                str_belief_state_per_frame = "{act} [ {slot_values} ]".format(
                    act=bs_per_frame['act'].strip(),
                    slot_values=', '.join(
                        [f'{kv[0].strip()} = {kv[1].strip()}'
                            for kv in bs_per_frame['slots']])
                )
                belief_state.append(str_belief_state_per_frame)

                # Track OOVs
                if output_path_special_tokens != '':
                    oov.add(bs_per_frame['act'])
                    for kv in bs_per_frame['slots']:
                        slot_name = kv[0]
                        oov.add(slot_name)
                        # slot_name, slot_value = kv[0].strip(), kv[1].strip()
                        # oov.add(slot_name)
                        # oov.add(slot_value)

            str_belief_state = ' '.join(belief_state)

            # Format the main input
            predict = TEMPLATE_PREDICT.format(
                context=context,
                START_BELIEF_STATE=START_BELIEF_STATE,
            )
            predicts.append(predict)

            # Format the main output
            target = TEMPLATE_TARGET.format(
                context=context,
                START_BELIEF_STATE=START_BELIEF_STATE,
                belief_state=str_belief_state,
                END_OF_BELIEF=END_OF_BELIEF,
                response=asst_uttr,
                END_OF_SENTENCE=END_OF_SENTENCE
            )
            targets.append(target)

    # Create a directory if it does not exist
    directory = os.path.dirname(output_path_predict)
    if not os.path.exists(directory):
        os.makedirs(directory, exist_ok=True)

    directory = os.path.dirname(output_path_target)
    if not os.path.exists(directory):
        os.makedirs(directory, exist_ok=True)

    # Output into text files
    with open(output_path_predict, 'w') as f_predict:
        X = '\n'.join(predicts)
        f_predict.write(X)

    with open(output_path_target, 'w') as f_target:
        Y = '\n'.join(targets)
        f_target.write(Y)

    if output_path_special_tokens != '':
        # Create a directory if it does not exist
        directory = os.path.dirname(output_path_special_tokens)
        if not os.path.exists(directory):
            os.makedirs(directory, exist_ok=True)

        with open(output_path_special_tokens, 'w') as f_special_tokens:
            # Add oov's (acts and slot names, etc.) to special tokens as well
            special_tokens['additional_special_tokens'].extend(list(oov))
            json.dump(special_tokens, f_special_tokens)