in data_preprocessing/postprocess/prism.py [0:0]
def generate_no_spatial(croptype, start_month, end_month, selected_states=None):
yield_data = pd.read_csv('../../processed_data/crop_yield/{}_1999_2018.csv'.format(croptype))[[
'Year', 'State ANSI', 'County ANSI', 'Value']]
yield_data.columns = ['year', 'state', 'county', 'value']
if yield_data.value.dtype != float:
yield_data['value'] = yield_data['value'].str.replace(',', '')
yield_data = yield_data.astype({'year': int, 'state': int, 'county': int, 'value': float})
counties = pd.read_csv('../../processed_data/counties/prism/us_counties_cro_cvm_locations.csv')
county_dic = {}
for c in counties.itertuples():
state, county, lat0, lat1, lon0, lon1 = c.state, c.county, c.lat0, c.lat1, c.lon0, c.lon1
county_dic[(state, county)] = [lat0, lat1, lon0, lon1]
climate_vars = ["ppt", "tdmean", "tmax", "tmean", "tmin", "vpdmax", "vpdmin"]
csv_header = ['year', 'state', 'county', 'yield']
for month in map(str, range(start_month, end_month+1)):
for climate_var in climate_vars:
csv_header.append(climate_var + "_" + month)
for climate_var in climate_vars:
csv_header.append(climate_var + "_mean")
output_file = '../../experiment_data/no_spatial/{}_{}_{}.csv'.format(croptype, start_month, end_month) \
if not selected_states else '../../experiment_data/no_spatial/{}_{}_{}_major_states.csv'.format(croptype, start_month, end_month)
with open(output_file, 'w') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow(csv_header)
for yd in yield_data.itertuples():
year, state, county, value = yd.year, yd.state, yd.county, yd.value
if selected_states is not None and state not in selected_states:
continue
# no location info
if (state, county) not in county_dic:
continue
lat0, lat1, lon0, lon1 = county_dic[(state, county)]
assert lat1 - lat0 == 9
assert lon1 - lon0 == 9
values = [year, state, county, value]
value_dic = defaultdict(list)
for month in range(start_month, end_month+1):
fh = Dataset('../../processed_data/prism/combined_monthly/{}{}.nc'.format(year, '{0:02}'.format(month)))
for climate_var in climate_vars:
selected_values = fh.variables[climate_var][lat0:lat1+1, lon0:lon1+1]
averaged = ma.mean(selected_values)
values.append(averaged)
value_dic[climate_var].append(averaged)
fh.close()
for climate_var in climate_vars:
values.append(np.mean(value_dic[climate_var]))
writer.writerow(values)