in src/protein_structure/embedding_from_esmfold.py [0:0]
def load_done_set(result_info_path, uncompleted_path, other_fasta_id_2_idx_file, other_uncompleted_file):
"""
What has already been done does not need to be re-predict
:param result_info_path:
:param uncompleted_path:
:param other_fasta_id_2_idx_file:
:param other_uncompleted_file:
:return:
"""
done_set = set()
max_uuid_index = 0
if result_info_path and os.path.exists(result_info_path):
with open(result_info_path, "r") as rfp:
reader = csv.reader(rfp)
cnt = 0
for row in reader:
cnt += 1
if cnt == 1 or row[0] == "index":
continue
index = int(row[0])
uuid = row[1].strip()
if max_uuid_index < index:
max_uuid_index = index
done_set.add(uuid)
if other_fasta_id_2_idx_file and os.path.exists(other_fasta_id_2_idx_file):
with open(other_fasta_id_2_idx_file, "r") as rfp:
reader = csv.reader(rfp)
cnt = 0
for row in reader:
cnt += 1
if cnt == 1 or row[0] == "index":
continue
index = int(row[0])
uuid = row[1].strip()
if max_uuid_index < index:
max_uuid_index = index
done_set.add(uuid)
if uncompleted_path and os.path.exists(uncompleted_path):
with open(uncompleted_path, "r") as rfp:
for line in rfp:
line = line.strip()
ridx = line.rfind(",")
if ridx > -1:
uuid = line[:ridx]
done_set.add(uuid)
if other_uncompleted_file and os.path.exists(other_uncompleted_file):
with open(other_uncompleted_file, "r") as rfp:
for line in rfp:
line = line.strip()
ridx = line.rfind(",")
if ridx > -1:
uuid = line[:ridx]
done_set.add(uuid)
return done_set, max_uuid_index