def load_done_set()

in src/protein_structure/embedding_from_esmfold.py [0:0]


def load_done_set(result_info_path, uncompleted_path, other_fasta_id_2_idx_file, other_uncompleted_file):
    """
    What has already been done does not need to be re-predict
    :param result_info_path:
    :param uncompleted_path:
    :param other_fasta_id_2_idx_file:
    :param other_uncompleted_file:
    :return:
    """
    done_set = set()
    max_uuid_index = 0
    if result_info_path and os.path.exists(result_info_path):
        with open(result_info_path, "r") as rfp:
            reader = csv.reader(rfp)
            cnt = 0
            for row in reader:
                cnt += 1
                if cnt == 1 or row[0] == "index":
                    continue
                index = int(row[0])
                uuid = row[1].strip()
                if max_uuid_index < index:
                    max_uuid_index = index
                done_set.add(uuid)
    if other_fasta_id_2_idx_file and os.path.exists(other_fasta_id_2_idx_file):
        with open(other_fasta_id_2_idx_file, "r") as rfp:
            reader = csv.reader(rfp)
            cnt = 0
            for row in reader:
                cnt += 1
                if cnt == 1 or row[0] == "index":
                    continue
                index = int(row[0])
                uuid = row[1].strip()
                if max_uuid_index < index:
                    max_uuid_index = index
                done_set.add(uuid)
    if uncompleted_path and os.path.exists(uncompleted_path):
        with open(uncompleted_path, "r") as rfp:
            for line in rfp:
                line = line.strip()
                ridx = line.rfind(",")
                if ridx > -1:
                    uuid = line[:ridx]
                    done_set.add(uuid)
    if other_uncompleted_file and os.path.exists(other_uncompleted_file):
        with open(other_uncompleted_file, "r") as rfp:
            for line in rfp:
                line = line.strip()
                ridx = line.rfind(",")
                if ridx > -1:
                    uuid = line[:ridx]
                    done_set.add(uuid)
    return done_set, max_uuid_index