def cif_to_embed()

in mmcif_utils.py [0:0]
33 lines of code
13 McCabe index (conditional complexity)

def cif_to_embed(cif_file, ix=None, parse_skip=False):
    """
    Parses a CIF file into a more convenient representation.

    # Embedding format for nodes:
    # 'one hot amino acid' amino type of molecule
    # 'x, y, z' positional encoding
    # 'one hot representation of atom type', either C, CA, N, O,

    """
    st = gemmi.read_structure(cif_file)

    results = []
    skips = []
    for model in st:
        for i, chain in enumerate(model):

            if (ix is not None) and (ix != i):
                continue

            atoms = []
            node_embeddings = []
            for j, residue in enumerate(chain):
                translation = []

                if residue.name not in residue_names:
                    # Skip over any structure that contains nucleotides
                    if residue.name in ["DA", "DC", "DG", "DT"]:
                        return None, None
                    else:
                        continue

                residue_counter = 0
                namino_elements = len(res_parents[residue.name])
                amino_atoms = res_atoms[residue.name]

                residue_atoms = []
                residue_embed = []

                # reisdue object contains information about the residue, including identity
                # and spatial coordiantes for atoms in the residue. We parse this into a
                # dense encoding, for feeding into a neural network.
                node_embed = parse_residue_embed(residue)

                if len(node_embed) == 0:
                    skips.append(j)

                node_embeddings.extend(node_embed)

            node_embeddings = np.array(node_embeddings)

            result = (node_embeddings,)
            results.append(result)

    if parse_skip:
        return st, results, skips
    else:
        return st, results