def industry_classification_result_process_single()

in utils/compute_score.py [0:0]


def industry_classification_result_process_single(content):
    content = content.replace(':', ':').replace(',', ',').replace(' ', '').replace(';', ';').replace('。', '.').replace(
        ',\n', '\n')
    match_result = re.match('行业:(.*)情感分类:(.*)', content, re.DOTALL)
    class_info_dict = {}
    if match_result is not None:
        ind_info, class_info = match_result.groups()
        ind_info = ind_info.strip('\n.;')
        class_info = class_info.strip('\n.;')
        num_infos = get_num_infos(class_info)
        ## 情感分类为多个项目
        if len(num_infos) > 0:
            sep = num_infos[0]['sep']
            class_info_list = class_info.split(sep)
            ## 情感分类为二阶
            try:
                if len(num_infos) > 1:
                    sec_sep = num_infos[1]['sep']
                    class_info_dict = dict([tuple(x.split(':')) for x in class_info_list])
                else:
                    ## 情感分类为一阶
                    num_infos = get_num_infos(ind_info)
                    if len(num_infos) > 0:
                        sep = num_infos[0]['sep']
                        ind_info_list = ind_info.split(sep)
                    class_info_list_adj = extract_re_exprs(class_info_list, ['.*[为是](.*)', '.*((.*))'])
                    if None not in class_info_list_adj:
                        class_info_dict = dict(zip(ind_info_list, class_info_list_adj))
                    else:
                        max_len = max(len(ind_info_list), len(class_info_list))
                        class_info_list = pad_list(class_info_list, max_len, '无')
                        class_info_dict = dict(zip(ind_info_list, class_info_list))
            except:
                find_res = re.findall('(\w+)[:\s]+([正|负|中|无])\w+', class_info)
                class_info_dict = {x[0]: x[1] for x in find_res}
        else:
            num_infos = get_num_infos(ind_info)
            if len(num_infos) > 0:
                sep = num_infos[0]['sep']
                ind_info_list = ind_info.split(sep)
                class_info_dict = dict(zip(ind_info_list, [class_info] * len(ind_info_list)))
            else:
                class_info_dict = {ind_info: class_info}
            pass
    else:
        if '抽取结果' in content:
            match_result = re.match('抽取结果.*?:(.*)', content, re.DOTALL)
            if match_result is not None:
                match_result = strip(match_result.groups()[0])
                match_result1 = re.match('.*情感分类结果:(.*)', match_result, re.DOTALL)
                if match_result1 is not None:
                    class_info = match_result1.groups()[0]
                    # num_infos = [x for x in sorted(get_num_infos(ind_info), key=lambda x: x['num']) if x['num'] > 0]
                    num_infos = get_num_infos(class_info)
                    if len(num_infos) > 0:
                        sep = num_infos[0]['sep']
                        class_info_list = class_info.split(sep)
                    match_result1 = re.match('(.*)情感分类结果', match_result, re.DOTALL)
                    if match_result1 is not None:
                        ind_info = match_result1.groups()[0].strip()
                        # num_infos = [x for x in sorted(get_num_infos(ind_info), key=lambda x: x['num']) if x['num'] > 0]
                        num_infos = get_num_infos(class_info)
                        if len(num_infos) > 0:
                            sep = num_infos[0]['sep']
                            ind_info_list = ind_info.split(sep)
                            if len(class_info_list) == len(ind_info_list):
                                class_info_dict = dict(zip(ind_info_list, class_info_list))
                            else:
                                max_len = max(len(ind_info_list), len(class_info_list))
                                ind_info_list = pad_list(ind_info_list, max_len, '无')
                                class_info_list = pad_list(class_info_list, max_len, '无')
                                class_info_dict = dict(zip(ind_info_list, class_info_list))
                else:
                    try:
                        num_infos = get_num_infos(match_result)
                        if len(num_infos) > 0:
                            sep = num_infos[0]['sep']
                            ind_info_list = match_result.split(sep)
                            if len(num_infos) > 1:
                                sep = num_infos[1]['sep']
                                class_info_dict = dict([tuple(x.split(sep)) for x in ind_info_list])
                            else:
                                class_info_dict = dict(zip(ind_info_list, ['无'] * len(ind_info_list)))
                        else:
                            class_info_dict = {match_result: '无'}
                    except:
                        find_res = re.findall('(\w+)[:\s]+([正|负|中|无])\w+', match_result)
                        class_info_dict = {x[0]: x[1] for x in find_res}
            else:
                find_res = re.findall('(\w+)[:\s]+([正|负|中|无])\w+', content)
                class_info_dict = {x[0]: x[1] for x in find_res}
        else:
            find_result = [(strip(x[0]), strip(x[1])) for x in re.findall('^行业:(.*)情感分类:(.*)', content, re.DOTALL)]
            if len(find_result) > 0:
                ind_info = find_result[0][0]
                class_info = find_result[0][1]
                num_infos = get_num_infos(ind_info)
                if len(num_infos) > 0:
                    sep = num_infos[0]['sep']
                    ind_info_list = ind_info.split(sep)
                    num_infos = get_num_infos(class_info)
                    if num_infos > 0:
                        sep = num_infos[0]['sep']
                        class_info_list = class_info.split(sep)
                    else:
                        class_info_list = [class_info] * len(ind_info_list)
            else:
                ## 预测结果如下:xxx:xxx,xxx:xxx
                find_result = [strip(x) for x in re.findall('^.*结果.*?:(.*)', content, re.DOTALL)]
                if len(find_result) > 0:
                    class_info_dict = decode_re_content(find_result[0])
                else:
                    class_info_dict = decode_re_content(content)
    if class_info_dict is None or len(class_info_dict) <= 0:
        find_res = re.findall('(\w+)[:\s]*([正|负|中|无])\w+', content)
        class_info_dict = {x[0]: x[1] for x in find_res}
    return class_info_dict