in dialogue_personalization/utils/data_reader.py [0:0]
def preprocess(data,vocab):
newdata = {}
cnt_ptr = 0
cnt_voc = 0
for k,v in data.items():
p = eval(k)
for e in p: vocab.index_words(e)
new_v = {i: [] for i in range(len(v))}
for d_index, dial in enumerate(v):
if(config.persona):
context = list(p)
else:
context = []
for turn in dial:
context.append(turn["u"])
vocab.index_words(turn["u"])
vocab.index_words(turn["r"])
for i, c in enumerate(turn['cand']):
vocab.index_words(c)
if(turn["r"]==c): answer = i
new_v[d_index].append([list(context),turn['cand'],answer,eval(k)])
# print(sum(context,[]).split(" "))
## compute stats
for key in turn["r"].split(" "):
index = [loc for loc, val in enumerate(" ".join(context).split(" ")) if (val == key)]
if (index):
cnt_ptr +=1
else:
cnt_voc +=1
context.append(turn["r"])
newdata[k] = new_v
print("Pointer percentace= {} ".format(cnt_ptr/(cnt_ptr+cnt_voc)))
return newdata