in data_preprocessing/preprocess/lst.py [0:0]
def generate_monthly_average(start_year, end_year, start_month, end_month):
in_dir = '../../raw_data/lst/1km'
out_dir = '../../processed_data/lst/monthly_1km'
os.makedirs(out_dir, exist_ok=True)
for year in range(start_year, end_year):
for month in range(start_month, end_month):
fh_out = Dataset('{}/{}{}.nc'.format(out_dir, year, '{0:02}'.format(month)), 'w')
print(year, month)
var_lis = defaultdict(list)
first = True
num_days = calendar.monthrange(year, month)[1]
days = map(lambda x: x.strftime('%Y%m%d'), [datetime.date(year, month, day) for day in range(1, num_days+1)])
for day in days:
if '{}.nc'.format(day) not in os.listdir(in_dir):
print('Missing {}'.format(day))
continue
fh_in = Dataset('{}/{}.nc'.format(in_dir, day), 'r')
len_lat, len_lon = len(fh_in.variables['lat'][:]), len(fh_in.variables['lon'][:])
assert len_lat == 3578 or len_lat == 3579
assert len_lon == 7797
for v_name, varin in fh_in.variables.items():
if v_name in ['LST_Day_1km', 'LST_Night_1km']:
if len_lat == 3578:
var_lis[v_name[:-4].lower()].append(fh_in.variables[v_name][:])
else:
var_lis[v_name[:-4].lower()].append(fh_in.variables[v_name][:-1, :])
if first:
for name, dim in fh_in.dimensions.items():
if name == 'lat':
fh_out.createDimension(name, 3578)
else:
fh_out.createDimension(name, len(dim))
for v_name, varin in fh_in.variables.items():
if v_name in ['LST_Day_1km', 'LST_Night_1km'] or v_name in ["lat", "lon"]:
new_name = v_name[:-4].lower() if v_name in ['LST_Day_1km', 'LST_Night_1km'] else v_name
outVar = fh_out.createVariable(new_name, varin.datatype, varin.dimensions)
outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
if v_name == 'lat':
outVar[:] = varin[:3578]
elif v_name == 'lon':
outVar[:] = varin[:]
first = False
fh_in.close()
for var in fh_out.variables:
if var != "lat" and var != "lon":
print(ma.array(var_lis[var]).shape)
fh_out.variables[var][:] = ma.array(var_lis[var]).mean(axis=0)
fh_out.close()