in cs_CZ/thesaurus/dictionary-to-thesaurus.py [0:0]
def buildThesaurus(synonyms, meanings, classification):
# for every word:
# find all the indexes, and then again map the indexes to words - these are the synonyms
for word in sorted(meanings.keys()):
# we assume that various indexes (english words here) are various
# meanings; not generally true, but...
indexes = meanings[word]
# only limit the words if the type is unambiguous
typ = ''
if word in classification and len(classification[word]) == 1:
typ = classification[word][0]
# we want to output each word just once
used_this_round = [ word ]
output_lines = []
for index in indexes:
syns = synonyms[index]
# collect types first
types = []
for (w, t) in syns:
if not t in types:
types.append(t)
# build the various thesaurus lines
line = {}
for syn in syns:
(w, t) = syn
if typ != '' and t != '' and typ != t:
continue
if not w in used_this_round:
if t in line:
line[t] += '|' + w
else:
line[t] = '|' + w
used_this_round.append(w)
if len(line) != 0:
for t in types:
if t in line:
output_lines.append( (t, line[t]) )
if len(output_lines) > 0:
print word + '|' + str(len(output_lines))
# those with existing classification are probably a better fit,
# put them to the front (even if we don't output the
# classification in the end)
for i in [0, 1]:
for (t, line) in output_lines:
# first pass only non-empty, 2nd pass only empty
if (i == 0 and t != '') or (i == 1 and t == ''):
if typ == '':
print t + line
else:
print line