in data_parsing.py [0:0]
def post_processing(senses):
quotes = []
examples = []
#for each word, pos pair, generate idxs
word_keys = set([generate_word_key(s) for s in senses])
word_idxs = {}
for w in word_keys:
word_idxs[w] = 0
#pull out examples, quotations
for s in senses:
#generate sense id
word_k = generate_word_key(s)
s_id = '{}.{}'.format(word_k, word_idxs[word_k])
word_idxs[word_k] += 1
#label with sense_id
s['sense_id'] = s_id
#pull out any quotes
if len(s['quotations']) > 0:
for x, attrib in s['quotations']:
sent = clean_text(x, match_sense=s_id)
if len(sent) > 0:
q = (sent, s_id, attrib)
quotes.append(q)
#pull out any examples
if len(s['examples']) > 0:
for x in s['examples']:
sent = clean_text(x, match_sense=s_id)
if len(sent) > 0:
e = (sent, s_id)
examples.append(e)
#remove quotes, examples lists from sense obj
s.pop('quotations')
s.pop('examples')
return senses, quotes, examples