def _preprocess_db()

in mmcif_utils.py [0:0]


def _preprocess_db(db, name):
    df = db[name]

    bucketed_data = [[{} for _1 in range(36)] for _2 in range(36)]

    df_rows = df.to_dict("records")
    for row in df_rows:
        phi, psi = row["Phi"], row["Psi"]

        wraparound = False
        if phi == 180:
            wraparound = True
            phi = -180
        if psi == 180:
            wraparound = True
            psi = -180

        phi_bucket, psi_bucket = discrete_angle_to_bucket(phi), discrete_angle_to_bucket(psi)

        rotind = get_rotind(row["r1"], row["r2"], row["r3"], row["r4"])
        chimeans = np.array([row[f"chi{i}Val"] for i in range(1, 5)])
        chisigmas = np.array([row[f"chi{i}Sig"] for i in range(1, 5)])
        prob = row["Probabil"]

        bucket = bucketed_data[phi_bucket][psi_bucket]
        bucket_data = (chimeans, chisigmas, prob)

        if wraparound:
            assert (
                (bucket[rotind][0] == bucket_data[0]).all()
                and (bucket[rotind][1] == bucket_data[1]).all()
                and (bucket[rotind][2] == bucket_data[2])
            )

        else:
            bucket[rotind] = bucket_data

    quadrant_data = [[None for _1 in range(36)] for _2 in range(36)]

    for lower_phi_bucket in range(36):
        for lower_psi_bucket in range(36):
            upper_phi_bucket = (lower_phi_bucket + 1) % 36
            upper_psi_bucket = (lower_psi_bucket + 1) % 36

            quadrants = [
                bucketed_data[lower_phi_bucket][lower_psi_bucket],
                bucketed_data[upper_phi_bucket][lower_psi_bucket],
                bucketed_data[lower_phi_bucket][upper_psi_bucket],
                bucketed_data[upper_phi_bucket][upper_psi_bucket],
            ]

            rotinds = np.array(
                sorted(set().union(*[set(quadrant.keys()) for quadrant in quadrants])),
                dtype=np.int,
            )

            assert len(rotinds) > 0

            exists = np.zeros((len(rotinds), 4), dtype=np.bool)
            probs = np.zeros((len(rotinds), 4), dtype=np.float64)
            chimeans = np.zeros((len(rotinds), 4, 4), dtype=np.float64)
            chisigmas = np.zeros((len(rotinds), 4, 4), dtype=np.float64)

            for i, rotind in enumerate(rotinds):
                for qid, quadrant in enumerate(quadrants):
                    if rotind not in quadrant:
                        continue

                    quadrant_chimeans, quadrant_chisigmas, quadrant_prob = quadrant[rotind]

                    exists[i, qid] = True
                    probs[i, qid] = quadrant_prob
                    chimeans[i, qid] = quadrant_chimeans
                    chisigmas[i, qid] = quadrant_chisigmas

            meanprobs = probs.mean(1)
            order = np.argsort(-meanprobs, kind="stable")
            meanprobs = meanprobs[order]
            cumprobs = np.cumsum(meanprobs)

            assert np.abs(cumprobs[-1] - 1) < 1e-5

            quadrant_data[lower_phi_bucket][lower_psi_bucket] = QuadrantData(
                chimeans=chimeans[order],
                chisigmas=chisigmas[order],
                probs=probs[order],
                exists=exists[order],
                rotinds=rotinds[order],
                meanprobs=meanprobs,
                cumprobs=cumprobs,
            )

    return quadrant_data