def generate_monthly_average()

in data_preprocessing/preprocess/lst.py [0:0]


def generate_monthly_average(start_year, end_year, start_month, end_month):
    in_dir = '../../raw_data/lst/1km'
    out_dir = '../../processed_data/lst/monthly_1km'
    os.makedirs(out_dir, exist_ok=True)
    for year in range(start_year, end_year):
        for month in range(start_month, end_month):
            fh_out = Dataset('{}/{}{}.nc'.format(out_dir, year, '{0:02}'.format(month)), 'w')
            print(year, month)

            var_lis = defaultdict(list)
            first = True
            num_days = calendar.monthrange(year, month)[1]
            days = map(lambda x: x.strftime('%Y%m%d'), [datetime.date(year, month, day) for day in range(1, num_days+1)])
            for day in days:
                if '{}.nc'.format(day) not in os.listdir(in_dir):
                    print('Missing {}'.format(day))
                    continue
                fh_in = Dataset('{}/{}.nc'.format(in_dir, day), 'r')

                len_lat, len_lon = len(fh_in.variables['lat'][:]), len(fh_in.variables['lon'][:])
                assert len_lat == 3578 or len_lat == 3579
                assert len_lon == 7797

                for v_name, varin in fh_in.variables.items():
                    if v_name in ['LST_Day_1km', 'LST_Night_1km']:
                        if len_lat == 3578:
                            var_lis[v_name[:-4].lower()].append(fh_in.variables[v_name][:])
                        else:
                            var_lis[v_name[:-4].lower()].append(fh_in.variables[v_name][:-1, :])

                if first:
                    for name, dim in fh_in.dimensions.items():
                        if name == 'lat':
                            fh_out.createDimension(name, 3578)
                        else:
                            fh_out.createDimension(name, len(dim))
                    for v_name, varin in fh_in.variables.items():
                        if v_name in ['LST_Day_1km', 'LST_Night_1km'] or v_name in ["lat", "lon"]:
                            new_name = v_name[:-4].lower() if v_name in ['LST_Day_1km', 'LST_Night_1km'] else v_name
                            outVar = fh_out.createVariable(new_name, varin.datatype, varin.dimensions)
                            outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
                            if v_name == 'lat':
                                outVar[:] = varin[:3578]
                            elif v_name == 'lon':
                                outVar[:] = varin[:]

                    first = False

                fh_in.close()

            for var in fh_out.variables:
                if var != "lat" and var != "lon":
                    print(ma.array(var_lis[var]).shape)
                    fh_out.variables[var][:] = ma.array(var_lis[var]).mean(axis=0)

            fh_out.close()