in data_preprocessing/postprocess/combine_multi_vars.py [0:0]
def generate_no_spatial_for_counties(yield_data_dir, ppt_file, county_location_file, out_dir, img_dir, croptype, start_month, end_month, start_index):
yield_data = pd.read_csv('{}/{}_2000_2018.csv'.format(yield_data_dir, croptype))[[
'Year', 'State ANSI', 'County ANSI', 'Value']]
yield_data.columns = ['year', 'state', 'county', 'value']
if yield_data.value.dtype != float:
yield_data['value'] = yield_data['value'].str.replace(',', '')
yield_data = yield_data.astype({'year': int, 'state': int, 'county': int, 'value': float})
ppt_fh = Dataset(ppt_file, 'r')
v_ppt = ppt_fh.variables['ppt'][0, :, :]
counties = pd.read_csv(county_location_file)
county_dic = {}
for c in counties.itertuples():
state, county, lat0, lat1, lon0, lon1 = c.state, c.county, c.lat0, c.lat1, c.lon0, c.lon1
county_dic[(state, county)] = [lat0, lat1, lon0, lon1]
csv_header = ['year', 'state', 'county', 'yield']
for climate_var in DYNAMIC_CLIMATE_VARS:
for month in map(str, range(start_month, end_month+1)):
csv_header.append(climate_var + "_" + month)
for climate_var in STATIC_CLIMATE_VARS:
csv_header.append(climate_var)
for climate_var in CLIMATE_VARS:
csv_header.append(climate_var + "_mean")
output_file = '{}/{}_{}_{}.csv'.format(out_dir, croptype, start_month, end_month)
n_t = end_month - start_month + 1
with open(output_file, 'w') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow(csv_header)
for yd in yield_data.itertuples():
year, state, county, value = yd.year, yd.state, yd.county, yd.value
# no location info
if (state, county) not in county_dic:
continue
lat0, lat1, lon0, lon1 = county_dic[(state, county)]
assert lat1 - lat0 == 49
assert lon1 - lon0 == 49
selected_ppt = v_ppt[lat0:lat1 + 1, lon0:lon1 + 1]
if ma.count_masked(selected_ppt) != 0:
continue
values = [year, state, county, value]
value_dic = defaultdict(list)
if '{}.nc'.format(year) not in os.listdir(img_dir):
continue
fh = Dataset('{}/{}.nc'.format(img_dir, year))
for climate_var in DYNAMIC_CLIMATE_VARS:
for i_month in range(n_t):
selected_values = fh.variables[climate_var][i_month+start_index, lat0:lat1+1, lon0:lon1+1]
averaged = ma.mean(selected_values)
values.append(averaged)
value_dic[climate_var].append(averaged)
for climate_var in STATIC_CLIMATE_VARS:
selected_values = fh.variables[climate_var][0, lat0:lat1 + 1, lon0:lon1 + 1]
averaged = ma.mean(selected_values)
values.append(averaged)
value_dic[climate_var].append(averaged)
fh.close()
for climate_var in CLIMATE_VARS:
values.append(np.mean(value_dic[climate_var]))
writer.writerow(values)