def main()

in covid19_spread/data/usa/convert.py [0:0]


def main(metric, with_features, source, resolution):
    df = SOURCES[source](metric)
    df.index = pd.to_datetime(df.index)

    dates = df.index
    df.columns = [c.split("_")[1] + ", " + c.split("_")[0] for c in df.columns]

    # drop all zero columns
    df = df[df.columns[(df.sum(axis=0) != 0).values]]

    df = df.transpose()  # row for each county, columns correspond to dates...

    # make sure counts are strictly increasing
    df = df.cummax(axis=1)

    # throw away all-zero columns, i.e., days with no cases
    counts = df.sum(axis=0)
    df = df.iloc[:, np.where(counts > 0)[0]]

    if resolution == "state":
        df = df.groupby(lambda x: x.split(", ")[-1]).sum()
        df = df.drop(
            index=["Virgin Islands", "Northern Mariana Islands", "Puerto Rico", "Guam"],
            errors="ignore",
        )

    county_id = {c: i for i, c in enumerate(df.index)}

    df.to_csv(f"{SCRIPT_DIR}/data_{metric}.csv", index_label="region")
    df[df.index.str.endswith("New York")].to_csv(
        f"{SCRIPT_DIR}/data_{metric}_ny.csv", index_label="region"
    )
    df[df.index.str.endswith("Florida")].to_csv(
        f"{SCRIPT_DIR}/data_{metric}_fl.csv", index_label="region"
    )

    if resolution == "county":
        # Build state graph...
        adj = np.zeros((len(df), len(df)))
        for _, g in df.groupby(lambda x: x.split(", ")[-1]):
            idxs = np.array([county_id[c] for c in g.index])
            adj[np.ix_(idxs, idxs)] = 1

        print(adj)
        th.save(th.from_numpy(adj), f"{SCRIPT_DIR}/state_graph.pt")

    if with_features:
        create_time_features()
        res = resolution
        merge_nyc = metric == "deaths" and res == "county"

        features = [
            (f"{SCRIPT_DIR}/testing/ratio_features_{res}.csv", 0, res),
            (f"{SCRIPT_DIR}/testing/total_features_{res}.csv", 0, res),
            (f"{SCRIPT_DIR}/fb/mobility_features_{res}_fb.csv", 5, res),
            (f"{SCRIPT_DIR}/google/mobility_features_{res}_google.csv", 5, res),
            (f"{SCRIPT_DIR}/google/weather_features_{res}.csv", 5, res),
            (f"{SCRIPT_DIR}/google/epi_features_{res}.csv", 7, res),
            (f"{SCRIPT_DIR}/google/epi_features_{res}.csv", 7, res),
        ]
        if res == "state":
            features.append((f"{SCRIPT_DIR}/google/hosp_features_{res}.csv", 0, res))
            features.append((f"{SCRIPT_DIR}/shifted_features_{res}.csv", 0, res))
            features.append((f"{SCRIPT_DIR}/google/vaccination_state.csv", 0, "state"))
        else:
            features.append(
                (f"{SCRIPT_DIR}/google/vaccination_state.csv", 0, "county_state")
            )

        for signal, lag in [
            (f"{SCRIPT_DIR}/symptom_survey/doctor-visits_smoothed_adj_cli-{{}}.csv", 2),
            (f"{SCRIPT_DIR}/symptom_survey/fb-survey_smoothed_wcli-{{}}.csv", 0),
            (
                f"{SCRIPT_DIR}/symptom_survey/fb-survey_smoothed_hh_cmnty_cli-{{}}.csv",
                0,
            ),
            (
                f"{SCRIPT_DIR}/symptom_survey/fb-survey_smoothed_wearing_mask_all-{{}}.csv",
                5,
            ),
            (
                f"{SCRIPT_DIR}/symptom_survey/fb-survey_smoothed_wothers_masked-{{}}.csv",
                5,
            ),
            (
                f"{SCRIPT_DIR}/symptom_survey/fb-survey_smoothed_wcovid_vaccinated_or_accept-{{}}.csv",
                5,
            ),
            (f"{SCRIPT_DIR}/fb/mobility_features_{{}}_fb.csv", 5),
            (f"{SCRIPT_DIR}/google/mobility_features_{{}}_google.csv", 5),
        ]:
            if res == "county":
                features.append((signal.format("county"), lag, "county"))
                features.append((signal.format("state"), lag, "county_state"))
            else:
                features.append((signal.format("state"), lag, "state"))

        features = [(df, pth, lag, merge_nyc, r) for pth, lag, r in features]
        run_par([process_time_features] * len(features), features, [{}] * len(features))