def _create_examples_bak()

in src/data_loader.py [0:0]


    def _create_examples_bak(lines, dataset_type, data_dir, cmap_type, cmap_thresh, embedding_type, separate_file):
        examples = []
        if separate_file:
            for row in lines:
                # prot_id, seq, pdb_index, embed_idx, label = row[0], row[1], row[2], row[3], row[4]
                prot_id, seq, seq_len, pdb_filename, ptm, mean_plddt, emb_filename, label, source = row
                # pdb_filepath = os.path.join(data_dir, "npz", "%s.npz" % pdb_index)

                cmap = None
                if cmap_type:
                    pdb_filepath = os.path.join(data_dir, "pdbs", pdb_filename)
                    if pdb_filename and os.path.isfile(pdb_filepath):
                        loaded = np.load(pdb_filepath, allow_pickle=True)
                        prot_id = loaded["prot_id"].item()
                        cmap = loaded["C_alpha"] if cmap_type == "C_alpha" else loaded["C_beta"]
                        # convect the real distance matrix into 0-1 contact map
                        cmap = np.less_equal(cmap, cmap_thresh).astype(np.int32)
                        # seqres = loaded["seqres"].item()
                embedding_info = None
                if embedding_type:
                    # embedding_filepath = os.path.join(data_dir, "embeds", "%s.pt" % embed_idx)
                    embedding_filepath = os.path.join(data_dir, "embs", emb_filename)
                    if emb_filename and os.path.isfile(embedding_filepath):
                        emb = torch.load(embedding_filepath)
                        embedding_len = emb["seq_len"]
                        if embedding_type == "contacts":
                            embedding_info = emb["contacts"].numpy()
                            embedding_len = embedding_info.shape[0]
                            embedding_d = embedding_info.shape[1]
                        elif embedding_type == "matrix":
                            embedding_info = emb["representations"][36].numpy()
                            embedding_len = embedding_info.shape[0]
                            embedding_d = embedding_info.shape[1]
                        elif embedding_type == "bos":
                            embedding_info = emb["bos_representations"][36].numpy()
                            embedding_d = embedding_info.shape[0]
                    else:
                        raise Exception("%s not exists." % embedding_filepath)
                examples.append(InputExample(
                    guid=dataset_type + "#" + prot_id,
                    seq=seq,
                    contact_map=cmap,
                    embedding_info=embedding_info,
                    embedding_len=embedding_len,
                    embedding_dim=embedding_d,
                    label=label
                ))
        else:
            for row in lines:
                prot_id = row[0]
                seqres = row[1]
                label = row[2]
                examples.append(InputExample(
                    guid=dataset_type + "#" + prot_id,
                    seq=seqres,
                    contact_map=None,
                    embedding_info=None,
                    label=label
                ))
        return examples