def read_dailydialog_data()

in empchat/datasets/dailydialog.py [0:0]


    def read_dailydialog_data(floc, traindevtest):
        emolookup = {
            0: "none",
            1: "anger",
            2: "disgust",
            3: "fear",
            4: "happiness",
            5: "sadness",
            6: "surprise",
        }
        split_name = {"train": "train", "valid": "validation", "test": "test"}[
            traindevtest
        ]
        conversations = open(
            os.path.join(floc, split_name, "dialogues_" + split_name + ".txt")
        ).readlines()
        totemot = open(
            os.path.join(floc, split_name, "dialogues_emotion_" + split_name + ".txt")
        ).readlines()
        datarows = []
        for i in range(len(conversations)):
            lines = conversations[i].strip().split("__eou__")
            emotions = totemot[i].strip().split(" ")
            prev_context = []
            prev_emot_context = []
            if len(lines) - 1 != len(emotions):
                print("error")
            for j in range(len(lines) - 1):
                item = [
                    i,
                    prev_context.copy(),
                    multifeel_to_one(prev_emot_context.copy()),
                    lines[j],
                    emolookup[int(emotions[j])],
                ]
                datarows.append(item)
                prev_context.append(lines[j])
                prev_emot_context.append(emolookup[int(emotions[j])])
        return pd.DataFrame(
            datarows, columns=["convid", "context", "emo", "line", "nextemo"]
        )