def get_nyt()

in covid19_spread/data/usa/process_cases.py [0:0]


def get_nyt(metric="cases"):
    print("NYT")
    data_repo = update_repo("https://github.com/nytimes/covid-19-data.git")
    df = pandas.read_csv(
        os.path.join(data_repo, "us-counties.csv"), dtype={"fips": str}
    )
    index = get_index()
    df = df.merge(index[["fips", "subregion1_name", "name"]], on="fips")
    df["loc"] = df["subregion1_name"] + "_" + df["name"]
    pivot = df.pivot_table(values=metric, columns=["loc"], index="date")
    pivot = pivot.fillna(0)
    pivot.index = pandas.to_datetime(pivot.index)
    if metric == "deaths":
        return pivot

    # Swap out NYTimes NY state data with the NY DOH data.
    NYSTATE_URL = (
        "https://health.data.ny.gov/api/views/xdss-u53e/rows.csv?accessType=DOWNLOAD"
    )
    df = pandas.read_csv(NYSTATE_URL).rename(
        columns={"Test Date": "date", "Cumulative Number of Positives": "cases"}
    )
    df["loc"] = "New York_" + df["County"]
    df = df.pivot_table(values=metric, columns=["loc"], index="date")
    df.columns = [x + " County" for x in df.columns]
    # The NYT labels each date as the date the report comes out, not the date the data corresponds to.
    # Add 1 day to the NYS DOH data to get it to align
    df.index = pandas.to_datetime(df.index) + datetime.timedelta(days=1)
    without_nystate = pivot[[c for c in pivot.columns if not c.startswith("New York")]]
    last_date = min(without_nystate.index.max(), df.index.max())
    df = df[df.index <= last_date]
    without_nystate = without_nystate[without_nystate.index <= last_date]
    assert (
        df.index.max() == without_nystate.index.max()
    ), "NYT and DOH data don't matchup yet!"
    # Only take NYT data up to the date for which we have nystate data
    without_nystate[without_nystate.index <= df.index.max()]
    return without_nystate.merge(
        df, left_index=True, right_index=True, how="outer"
    ).fillna(0)