in empchat/datasets/dailydialog.py [0:0]
def read_dailydialog_data(floc, traindevtest):
emolookup = {
0: "none",
1: "anger",
2: "disgust",
3: "fear",
4: "happiness",
5: "sadness",
6: "surprise",
}
split_name = {"train": "train", "valid": "validation", "test": "test"}[
traindevtest
]
conversations = open(
os.path.join(floc, split_name, "dialogues_" + split_name + ".txt")
).readlines()
totemot = open(
os.path.join(floc, split_name, "dialogues_emotion_" + split_name + ".txt")
).readlines()
datarows = []
for i in range(len(conversations)):
lines = conversations[i].strip().split("__eou__")
emotions = totemot[i].strip().split(" ")
prev_context = []
prev_emot_context = []
if len(lines) - 1 != len(emotions):
print("error")
for j in range(len(lines) - 1):
item = [
i,
prev_context.copy(),
multifeel_to_one(prev_emot_context.copy()),
lines[j],
emolookup[int(emotions[j])],
]
datarows.append(item)
prev_context.append(lines[j])
prev_emot_context.append(emolookup[int(emotions[j])])
return pd.DataFrame(
datarows, columns=["convid", "context", "emo", "line", "nextemo"]
)