src/predict.py [35:156]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
try:
    from common.metrics import *
    from common.multi_label_metrics import *
    from utils import set_seed, plot_bins, csv_reader
    from SSFN.model import *
    from data_loader import load_and_cache_examples, convert_examples_to_features, InputExample, InputFeatures
except ImportError:
    from src.common.metrics import *
    from src.common.multi_label_metrics import *
    from src.utils import set_seed, plot_bins, csv_reader
    from src.SSFN.model import *
    from src.data_loader import load_and_cache_examples, convert_examples_to_features, InputExample, InputFeatures
import logging
logger = logging.getLogger(__name__)


def llprint(message):
    sys.stdout.write(message + "\n")
    sys.stdout.flush()


def load_label_code_2_name(args, filename):
    '''
    load the mapping between the label name and label code
    :param args:
    :param filename:
    :return:
    '''
    label_code_2_name = {}
    label_filepath = "../dataset/%s/%s/%s/%s" % (args.dataset_name, args.dataset_type, args.task_type, filename)
    if label_filepath and os.path.exists(label_filepath):
        with open(label_filepath, "r") as rfp:
            for line in rfp:
                strs = line.strip().split("###")
                label_code_2_name[strs[0]] = strs[1]
    return label_code_2_name


def load_args(log_dir):
    '''
    load model running args
    :param log_dir:
    :return: config
    '''
    print("-" * 25 + "log dir:" + "-" * 25)
    print(log_dir)
    print("-" * 60)
    log_filepath = os.path.join(log_dir, "logs.txt")
    if not os.path.exists(log_filepath):
        raise Exception("%s not exists" % log_filepath)
    with open(log_filepath, "r") as rfp:
        for line in rfp:
            if line.startswith("{"):
                obj = json.loads(line.strip())
                return obj
    return {}


def load_model(args, model_dir):
    '''
    load the model
    :param args:
    :param model_dir:
    :return:
    '''
    # load tokenizer and model
    device = torch.device(args.device)
    config_class, model_class, tokenizer_class = BertConfig, SequenceAndStructureFusionNetwork, BertTokenizer

    # config = config_class(**json.load(open(os.path.join(model_dir, "config.json"), "r"), encoding="UTF-8"))
    config = config_class(**json.load(open(os.path.join(model_dir, "config.json"), "r")))
    # for sequence
    subword = None
    if args.has_seq_encoder:
        seq_tokenizer = tokenizer_class.from_pretrained(
            os.path.join(model_dir, "sequence"),
            do_lower_case=args.do_lower_case
        )
        # seq_tokenizer = tokenizer_class(os.path.join(model_dir, "sequence"), "vocab.txt"), do_lower_case=args.do_lower_case)
        if args.subword:
            bpe_codes_prot = codecs.open(args.codes_file)
            subword = BPE(bpe_codes_prot, merges=-1, separator='')
    else:
        seq_tokenizer = None

    if args.has_struct_encoder:
        struct_tokenizer = tokenizer_class.from_pretrained(
            os.path.join(model_dir, "struct"),
            do_lower_case=args.do_lower_case
        )
        # struct_tokenizer = tokenizer_class(os.path.join(model_dir, "struct", "vocab.txt"), do_lower_case=args.do_lower_case)
    else:
        struct_tokenizer = None

    model = model_class.from_pretrained(model_dir, args=args)

    model.to(device)
    model.eval()

    # load labels
    label_filepath = args.label_filepath
    label_id_2_name = {}
    label_name_2_id = {}
    with open(label_filepath, "r") as fp:
        for line in fp:
            if line.strip() == "label":
                continue
            label_name = line.strip()
            label_id_2_name[len(label_id_2_name)] = label_name
            label_name_2_id[label_name] = len(label_name_2_id)

    print("-" * 25 + "label_id_2_name:" + "-" * 25)
    if len(label_id_2_name) < 20:
        print(label_id_2_name)
    print("label size: ", len(label_id_2_name))
    print("-" * 60)

    return config, subword, seq_tokenizer, struct_tokenizer, model, label_id_2_name, label_name_2_id


def transform_sample_2_feature(
        args,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/predict_many_samples.py [35:155]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
try:
    from common.multi_label_metrics import *
    from protein_structure.predict_structure import predict_embedding, predict_pdb, calc_distance_maps
    from utils import set_seed, plot_bins, csv_reade, fasta_reader, clean_seq
    from SSFN.model import *
    from data_loader import load_and_cache_examples, convert_examples_to_features, InputExample, InputFeatures
except ImportError:
    from src.common.multi_label_metrics import *
    from src.protein_structure.predict_structure import predict_embedding, predict_pdb, calc_distance_maps
    from src.utils import set_seed, plot_bins, csv_reader, fasta_reader, clean_seq
    from src.SSFN.model import *
    from src.data_loader import load_and_cache_examples, convert_examples_to_features, InputExample, InputFeatures

import logging
logger = logging.getLogger(__name__)


def llprint(message):
    sys.stdout.write(message + "\n")
    sys.stdout.flush()


def load_label_code_2_name(args, filename):
    '''
    load the mapping between the label name and label code
    :param args:
    :param filename:
    :return:
    '''
    label_code_2_name = {}
    label_filepath = "../dataset/%s/%s/%s/%s" % (args.dataset_name, args.dataset_type, args.task_type, filename)
    if label_filepath and os.path.exists(label_filepath):
        with open(label_filepath, "r") as rfp:
            for line in rfp:
                strs = line.strip().split("###")
                label_code_2_name[strs[0]] = strs[1]
    return label_code_2_name


def load_args(log_dir):
    '''
    load model running args
    :param log_dir:
    :return: config
    '''
    print("-" * 25 + "log dir:" + "-" * 25)
    print(log_dir)
    print("-" * 60)
    log_filepath = os.path.join(log_dir, "logs.txt")
    if not os.path.exists(log_filepath):
        raise Exception("%s not exists" % log_filepath)
    with open(log_filepath, "r") as rfp:
        for line in rfp:
            if line.startswith("{"):
                obj = json.loads(line.strip())
                return obj
    return {}


def load_model(args, model_dir):
    '''
    load the model
    :param args:
    :param model_dir:
    :return:
    '''
    # load tokenizer and model
    device = torch.device(args.device)
    config_class, model_class, tokenizer_class = BertConfig, SequenceAndStructureFusionNetwork, BertTokenizer

    # config = config_class(**json.load(open(os.path.join(model_dir, "config.json"), "r"), encoding="UTF-8"))
    config = config_class(**json.load(open(os.path.join(model_dir, "config.json"), "r")))
    # for sequence
    subword = None
    if args.has_seq_encoder:
        seq_tokenizer = tokenizer_class.from_pretrained(
            os.path.join(model_dir, "sequence"),
            do_lower_case=args.do_lower_case
        )
        if args.subword:
            bpe_codes_prot = codecs.open(args.codes_file)
            subword = BPE(bpe_codes_prot, merges=-1, separator='')
    else:
        seq_tokenizer = None

    if args.has_struct_encoder:
        struct_tokenizer = tokenizer_class.from_pretrained(
            os.path.join(model_dir, "struct"),
            do_lower_case=args.do_lower_case
        )
    else:
        struct_tokenizer = None

    model = model_class.from_pretrained(model_dir, args=args)

    model.to(device)
    model.eval()

    # load labels
    label_filepath = args.label_filepath
    label_id_2_name = {}
    label_name_2_id = {}
    with open(label_filepath, "r") as fp:
        for line in fp:
            if line.strip() == "label":
                continue
            label_name = line.strip()
            label_id_2_name[len(label_id_2_name)] = label_name
            label_name_2_id[label_name] = len(label_name_2_id)

    print("-" * 25 + "label_id_2_name:" + "-" * 25)
    if len(label_id_2_name) < 20:
        print(label_id_2_name)
    print("label size: ", len(label_id_2_name))
    print("-" * 60)

    return config, subword, seq_tokenizer, struct_tokenizer, model, label_id_2_name, label_name_2_id


def transform_sample_2_feature(
        args,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -