in src/data_loader.py [0:0]
def _create_examples_bak(lines, dataset_type, data_dir, cmap_type, cmap_thresh, embedding_type, separate_file):
examples = []
if separate_file:
for row in lines:
# prot_id, seq, pdb_index, embed_idx, label = row[0], row[1], row[2], row[3], row[4]
prot_id, seq, seq_len, pdb_filename, ptm, mean_plddt, emb_filename, label, source = row
# pdb_filepath = os.path.join(data_dir, "npz", "%s.npz" % pdb_index)
cmap = None
if cmap_type:
pdb_filepath = os.path.join(data_dir, "pdbs", pdb_filename)
if pdb_filename and os.path.isfile(pdb_filepath):
loaded = np.load(pdb_filepath, allow_pickle=True)
prot_id = loaded["prot_id"].item()
cmap = loaded["C_alpha"] if cmap_type == "C_alpha" else loaded["C_beta"]
# convect the real distance matrix into 0-1 contact map
cmap = np.less_equal(cmap, cmap_thresh).astype(np.int32)
# seqres = loaded["seqres"].item()
embedding_info = None
if embedding_type:
# embedding_filepath = os.path.join(data_dir, "embeds", "%s.pt" % embed_idx)
embedding_filepath = os.path.join(data_dir, "embs", emb_filename)
if emb_filename and os.path.isfile(embedding_filepath):
emb = torch.load(embedding_filepath)
embedding_len = emb["seq_len"]
if embedding_type == "contacts":
embedding_info = emb["contacts"].numpy()
embedding_len = embedding_info.shape[0]
embedding_d = embedding_info.shape[1]
elif embedding_type == "matrix":
embedding_info = emb["representations"][36].numpy()
embedding_len = embedding_info.shape[0]
embedding_d = embedding_info.shape[1]
elif embedding_type == "bos":
embedding_info = emb["bos_representations"][36].numpy()
embedding_d = embedding_info.shape[0]
else:
raise Exception("%s not exists." % embedding_filepath)
examples.append(InputExample(
guid=dataset_type + "#" + prot_id,
seq=seq,
contact_map=cmap,
embedding_info=embedding_info,
embedding_len=embedding_len,
embedding_dim=embedding_d,
label=label
))
else:
for row in lines:
prot_id = row[0]
seqres = row[1]
label = row[2]
examples.append(InputExample(
guid=dataset_type + "#" + prot_id,
seq=seqres,
contact_map=None,
embedding_info=None,
label=label
))
return examples