def cdl_upscale()

in data_preprocessing/rescaling/cdl_upscale.py [0:0]


def cdl_upscale(in_dir, in_file, out_dir, out_file, reso='40km', ignore=False):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    ignored_lis = [x for lis in ignored_labels.values() for x in lis]
    kept_lis = [x for x in cdl_values_to_crops.keys() if x not in ignored_lis]

    # increasing
    lats = np.load('../../processed_data/prism/latlon/lat_{}.npy'.format(reso))
    lons = np.load('../../processed_data/prism/latlon/lon_{}.npy'.format(reso))
    _, _, lat_bins, lon_bins = get_lat_lon_bins(lats, lons)

    fh_in = Dataset(os.path.join(in_dir, in_file), 'r')
    fh_out = Dataset(os.path.join(out_dir, out_file), 'w')

    dic_var = {}
    for var in ['lat', 'lon']:
        dic_var[var] = fh_in.variables[var]
    # increasing
    dic_var['lat_value'] = dic_var['lat'][:]
    dic_var['lon_value'] = dic_var['lon'][:]

    fh_out.createDimension('lat', len(lats))
    fh_out.createDimension('lon', len(lons))

    for var in ['lat', 'lon']:
        outVar = fh_out.createVariable(var, 'f4', (var,))
        outVar.setncatts({k: dic_var[var].getncattr(k) for k in dic_var[var].ncattrs()})
        outVar[:] = lats if var == "lat" else lons

    cdl_value = fh_in.variables['Band1'][:]
    cdl_resampled_dic = {}
    for v in cdl_values_to_crops.values():
        if (ignore and crops_to_cdl_values[v] in kept_lis) or not ignore:
            cdl_resampled_dic[v] = np.full((len(lats), len(lons)), -1.0)

    for s in ["1", "2", "3"]:
        cdl_resampled_dic["cdl_" + s] = np.full((len(lats), len(lons)), -1.0)
        cdl_resampled_dic["cdl_fraction_" + s] = np.full((len(lats), len(lons)), -1.0)

    for id_lats in range(len(lats)):
        for id_lons in range(len(lons)):
            lats_index = np.searchsorted(dic_var['lat_value'],
                                         [lat_bins[id_lats], lat_bins[id_lats + 1]])
            lons_index = np.searchsorted(dic_var['lon_value'],
                                         [lon_bins[id_lons], lon_bins[id_lons + 1]])

            if lats_index[0] != lats_index[1] and lons_index[0] != lons_index[1]:
                selected = cdl_value[np.array(range(lats_index[0], lats_index[1]))[:, None],
                                     np.array(range(lons_index[0], lons_index[1]))]
                # selected_size = selected.shape[0] * selected.shape[1]
                selected_compressed = selected.compressed()
                selected_size = len(selected_compressed)
                cdl_id, cdl_count = np.unique(selected_compressed, return_counts=True)

                # filter ignored_label after selected_size has been calculated
                if ignore:
                    new_cdl_id, new_cdl_count = [], []
                    for i, c in zip(cdl_id, cdl_count):
                        if i in kept_lis:
                            new_cdl_id.append(i)
                            new_cdl_count.append(c)
                    cdl_id, cdl_count = np.asarray(new_cdl_id), np.asarray(new_cdl_count)

                for i, c in zip(cdl_id, cdl_count):
                    cdl_resampled_dic[cdl_values_to_crops[i]][id_lats, id_lons] = c / selected_size
                cdl_count_sort_ind = np.argsort(-cdl_count)
                for i in range(3):
                    if len(cdl_id) > i:
                        cdl_resampled_dic["cdl_" + str(i+1)][id_lats, id_lons] = \
                            cdl_id[cdl_count_sort_ind[i]]
                        cdl_resampled_dic["cdl_fraction_" + str(i+1)][id_lats, id_lons] = \
                            cdl_count[cdl_count_sort_ind[i]] / selected_size
                    else:
                        cdl_resampled_dic["cdl_" + str(i + 1)][id_lats, id_lons] = -1
                        cdl_resampled_dic["cdl_fraction_" + str(i + 1)][id_lats, id_lons] = -1

    for v in cdl_values_to_crops.values():
        if (ignore and crops_to_cdl_values[v] in kept_lis) or not ignore:
            outVar = fh_out.createVariable("cdl_" + v.lower().replace(' ', '_').replace(' & ', '_').replace('/', '_'),
                                           'f4', ('lat', 'lon',))
            outVar[:] = cdl_resampled_dic[v][:]
            outVar[:] = ma.masked_equal(outVar, -1.0)
    for s in ["1", "2", "3"]:
        for t in ["cdl_", "cdl_fraction_"]:
            outVar = fh_out.createVariable(t + s, 'f4', ('lat', 'lon',))
            outVar[:] = cdl_resampled_dic[t + s][:]
            outVar[:] = ma.masked_equal(outVar, -1.0)

    fh_in.close()
    fh_out.close()