def parse()

in cs_CZ/thesaurus/dictionary-to-thesaurus.py [0:0]


def parse(filename, blacklistname):
    blacklist = {}

    with open(blacklistname, "r") as fp:
        for line in fp:
            if (line == ''):
                continue
            elif (line[0] == '#'):
                continue
            else:
                blacklist[line.strip(' \n')] = 1

    synonyms = {}
    meanings = {}
    classification = {}

    match_ignore = re.compile('(\[neprav\.\]|\[vulg\.\])')
    match_cleanup = re.compile('(\[.*\]|\*|:.*)')

    with open(filename, "r") as fp:
        for line in fp:
            if (line == ''):
                continue
            elif (line[0] == '#'):
                continue
            else:
                terms = line.split('\t')
                if (terms[0] == '' or len(terms) < 2):
                    continue

                index = terms[0].strip()
                if (index == ''):
                    continue

                word = terms[1].strip()
                if (word != '' and word[0] == '"' and word[len(word)-1] == '"'):
                    word = word.strip('" ')

                if (word == ''):
                    continue

                if (index + '\t' + word in blacklist or
                        index in blacklist or
                        index + '\t' in blacklist or
                        '\t' + word in blacklist):
                    continue

                typ = ''
                if (len(terms) >= 2):
                    typ = terms[2]

                    # ignore non-translations
                    if match_ignore.search(typ) != None:
                        continue

                    typ = match_cleanup.sub('', typ)
                    typ = typ.strip()

                typ = classify(typ)

                if index in synonyms:
                    synonyms[index].append( (word, typ) )
                else:
                    synonyms[index] = [ (word, typ) ]

                if word in meanings:
                    meanings[word].append(index)
                else:
                    meanings[word] = [ index ]

                if typ != '':
                    if word in classification:
                        if not typ in classification[word]:
                            classification[word].append(typ)
                    else:
                        classification[word] = [ typ ]

    return (synonyms, meanings, classification)