def preprocess()

in dialogue_personalization/utils/data_reader.py [0:0]


def preprocess(data,vocab):
    newdata = {}
    cnt_ptr = 0
    cnt_voc = 0
    for k,v in data.items():
        p = eval(k)
        
        for e in p: vocab.index_words(e)
        new_v = {i: [] for i in range(len(v))}
        for d_index, dial in enumerate(v):
            if(config.persona):
                context = list(p) 
            else:
                context = []
            for turn in dial:
                context.append(turn["u"])
                vocab.index_words(turn["u"])
                vocab.index_words(turn["r"])
                for i, c in enumerate(turn['cand']):
                    vocab.index_words(c)
                    if(turn["r"]==c): answer = i 
                        
                new_v[d_index].append([list(context),turn['cand'],answer,eval(k)])

                # print(sum(context,[]).split(" "))
                ## compute stats
                for key in turn["r"].split(" "):
                    index = [loc for loc, val in enumerate(" ".join(context).split(" ")) if (val == key)]
                    if (index):
                        cnt_ptr +=1
                    else:
                        cnt_voc +=1 
                context.append(turn["r"])
        newdata[k] = new_v
    print("Pointer percentace= {} ".format(cnt_ptr/(cnt_ptr+cnt_voc)))
    return newdata