def generate_no_spatial_for_counties()

in data_preprocessing/postprocess/combine_multi_vars.py [0:0]


def generate_no_spatial_for_counties(yield_data_dir, ppt_file, county_location_file, out_dir, img_dir, croptype, start_month, end_month, start_index):
    yield_data = pd.read_csv('{}/{}_2000_2018.csv'.format(yield_data_dir, croptype))[[
        'Year', 'State ANSI', 'County ANSI', 'Value']]
    yield_data.columns = ['year', 'state', 'county', 'value']
    if yield_data.value.dtype != float:
        yield_data['value'] = yield_data['value'].str.replace(',', '')
    yield_data = yield_data.astype({'year': int, 'state': int, 'county': int, 'value': float})

    ppt_fh = Dataset(ppt_file, 'r')
    v_ppt = ppt_fh.variables['ppt'][0, :, :]

    counties = pd.read_csv(county_location_file)
    county_dic = {}
    for c in counties.itertuples():
        state, county, lat0, lat1, lon0, lon1 = c.state, c.county, c.lat0, c.lat1, c.lon0, c.lon1
        county_dic[(state, county)] = [lat0, lat1, lon0, lon1]

    csv_header = ['year', 'state', 'county', 'yield']
    for climate_var in DYNAMIC_CLIMATE_VARS:
        for month in map(str, range(start_month, end_month+1)):
            csv_header.append(climate_var + "_" + month)
    for climate_var in STATIC_CLIMATE_VARS:
        csv_header.append(climate_var)
    for climate_var in CLIMATE_VARS:
        csv_header.append(climate_var + "_mean")

    output_file = '{}/{}_{}_{}.csv'.format(out_dir, croptype, start_month, end_month)
    n_t = end_month - start_month + 1
    with open(output_file, 'w') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerow(csv_header)
        for yd in yield_data.itertuples():
            year, state, county, value = yd.year, yd.state, yd.county, yd.value

            # no location info
            if (state, county) not in county_dic:
                continue
            lat0, lat1, lon0, lon1 = county_dic[(state, county)]
            assert lat1 - lat0 == 49
            assert lon1 - lon0 == 49

            selected_ppt = v_ppt[lat0:lat1 + 1, lon0:lon1 + 1]
            if ma.count_masked(selected_ppt) != 0:
                continue

            values = [year, state, county, value]
            value_dic = defaultdict(list)

            if '{}.nc'.format(year) not in os.listdir(img_dir):
                continue

            fh = Dataset('{}/{}.nc'.format(img_dir, year))
            for climate_var in DYNAMIC_CLIMATE_VARS:
                for i_month in range(n_t):
                    selected_values = fh.variables[climate_var][i_month+start_index, lat0:lat1+1, lon0:lon1+1]
                    averaged = ma.mean(selected_values)
                    values.append(averaged)
                    value_dic[climate_var].append(averaged)
            for climate_var in STATIC_CLIMATE_VARS:
                selected_values = fh.variables[climate_var][0, lat0:lat1 + 1, lon0:lon1 + 1]
                averaged = ma.mean(selected_values)
                values.append(averaged)
                value_dic[climate_var].append(averaged)
            fh.close()

            for climate_var in CLIMATE_VARS:
                values.append(np.mean(value_dic[climate_var]))

            writer.writerow(values)