def industry_classification_result_process1()

in utils/compute_score.py [0:0]


def industry_classification_result_process1(sub_df):
    contents_adj = []
    for i in range(sub_df.shape[0]):
        content = sub_df.iloc[i].predict.lstrip().rstrip()
        content = [strip(x) for x in content.split('\n') if len(x) > 0]
        content = '\n'.join(content)
        content = content.replace(':', ':').replace(',', ',').replace(';', ';'). \
            replace('。', '.').replace(',\n', '\n')
        find_res1 = re.findall('(\S+)[:|\s]+(\S+)', content)
        find_res2 = re.findall('(\S+)\s+(\S+)', content)
        if len(find_res1) > len(find_res2):
            find_res = find_res1
        else:
            find_res = find_res2
        result = {x[0]: x[1] for x in find_res if len(re.findall('正|负|中|无', x[1])) > 0}
        contents_adj.append(result)
    return contents_adj