def buildThesaurus()

in cs_CZ/thesaurus/dictionary-to-thesaurus.py [0:0]


def buildThesaurus(synonyms, meanings, classification):
    # for every word:
    #   find all the indexes, and then again map the indexes to words - these are the synonyms
    for word in sorted(meanings.keys()):
        # we assume that various indexes (english words here) are various
        # meanings; not generally true, but...
        indexes = meanings[word]

        # only limit the words if the type is unambiguous
        typ = ''
        if word in classification and len(classification[word]) == 1:
            typ = classification[word][0]

        # we want to output each word just once
        used_this_round = [ word ]

        output_lines = []
        for index in indexes:
            syns = synonyms[index]

            # collect types first
            types = []
            for (w, t) in syns:
                if not t in types:
                    types.append(t)

            # build the various thesaurus lines
            line = {}
            for syn in syns:
                (w, t) = syn

                if typ != '' and t != '' and typ != t:
                    continue

                if not w in used_this_round:
                    if t in line:
                        line[t] += '|' + w
                    else:
                        line[t] = '|' + w
                    used_this_round.append(w)

            if len(line) != 0:
                for t in types:
                    if t in line:
                        output_lines.append( (t, line[t]) )

        if len(output_lines) > 0:
            print word + '|' + str(len(output_lines))

            # those with existing classification are probably a better fit,
            # put them to the front (even if we don't output the
            # classification in the end)
            for i in [0, 1]:
                for (t, line) in output_lines:
                    # first pass only non-empty, 2nd pass only empty
                    if (i == 0 and t != '') or (i == 1 and t == ''):
                        if typ == '':
                            print t + line
                        else:
                            print line