def main()

in covid19_spread/data/usa/testing/process_testing.py [0:0]


def main():
    df = pd.read_csv(
        "https://beta.healthdata.gov/api/views/j8mb-icvb/rows.csv?accessType=DOWNLOAD",
        parse_dates=["date"],
    )
    df_piv = df.pivot(
        columns=["overall_outcome"],
        values="total_results_reported",
        index=["state", "date"],
    )
    df_piv = df_piv.fillna(0).groupby(level=0).cummax()

    index = get_index()
    states = index.drop_duplicates("subregion1_name")

    with_index = df_piv.reset_index().merge(
        states, left_on="state", right_on="subregion1_code"
    )

    df = with_index[
        ["subregion1_name", "Negative", "Positive", "Inconclusive", "date"]
    ].set_index("date")
    df = df.rename(columns={"subregion1_name": "state_name"})

    df["Total"] = df["Positive"] + df["Negative"] + df["Inconclusive"]

    def zscore(df):
        df.iloc[:, 0:] = (
            df.iloc[:, 0:].values
            - df.iloc[:, 0:].mean(axis=1, skipna=True).values[:, None]
        ) / df.iloc[:, 0:].std(axis=1, skipna=True).values[:, None]
        df = df.fillna(0)
        return df

    def zero_one(df):
        df = df.fillna(0)
        df = df.div(df.max(axis=1), axis=0)
        # df = df / df.max()
        df = df.fillna(0)
        return df

    def fmt_features(pivot, key, func_smooth, func_normalize):
        df = pivot.transpose()
        df = func_smooth(df)
        if func_normalize is not None:
            df = func_normalize(df)
        df = df.fillna(0)
        df.index.set_names("region", inplace=True)
        df["type"] = f"testing_{key}"
        merge = df.merge(index, left_index=True, right_on="subregion1_name")
        merge.index = merge["name"] + ", " + merge["subregion1_name"]
        return df, merge[df.columns]

    def _diff(df):
        return df.diff(axis=1).rolling(7, axis=1, min_periods=1).mean()

    state_r, county_r = fmt_features(
        df.pivot(columns="state_name", values=["Positive", "Total"]),
        "ratio",
        lambda _df: (_diff(_df.loc["Positive"]) / _diff(_df.loc["Total"])),
        None,
    )

    state_t, county_t = fmt_features(
        df.pivot(columns="state_name", values="Total"), "Total", _diff, zero_one,
    )

    def write_features(df, res, fout):
        df = df[["type"] + [c for c in df.columns if isinstance(c, datetime)]]
        df.columns = [
            str(x.date()) if isinstance(x, datetime) else x for x in df.columns
        ]
        df.round(3).to_csv(
            f"{SCRIPT_DIR}/{fout}_features_{res}.csv", index_label="region"
        )

    write_features(state_t, "state", "total")
    write_features(state_r, "state", "ratio")
    write_features(county_t, "county", "total")
    write_features(county_r, "county", "ratio")