def combine_by_year()

in data_preprocessing/postprocess/combine_multi_vars.py [0:0]


def combine_by_year(start_month, end_month, dir_var_tuple_list):
    fill_value_dic = {'ndvi': -0.2, 'evi': -0.2, 'elevation': 0,
                      'lst_day': 280, 'lst_night': 280, 'sand': 101, 'clay': 101, 'silt': 101}

    for year in range(2000, 2018):
        fh_out = Dataset('../../experiment_data/spatial_temporal/nc_files_unmasked/{}.nc'.format(year), 'w')

        var_list = []
        n_t = end_month - start_month + 1
        first_first_flag = True
        first_flag = True
        n_dim = {}
        ppt_mask = None

        for i_month, month in enumerate(range(start_month, end_month+1)):
            for (f_dir, selected_vars) in dir_var_tuple_list:
                if os.path.isfile(f_dir):
                    fh_in = Dataset(f_dir, 'r')
                else:
                    fh_in = Dataset('{}/{}{}.nc'.format(f_dir, year, '{0:02}'.format(month)))

                if first_first_flag:
                    for name, dim in fh_in.dimensions.items():
                        n_dim[name] = len(dim)
                        fh_out.createDimension(name, len(dim))

                    fh_out.createDimension('time', n_t)
                    outVar = fh_out.createVariable('time', 'int', ("time",))
                    outVar[:] = range(start_month, end_month + 1)

                    for v_name, varin in fh_in.variables.items():
                        if v_name == 'lat' or v_name == 'lon':
                            outVar = fh_out.createVariable(v_name, varin.datatype, varin.dimensions)
                            outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
                            outVar[:] = varin[:]

                    first_first_flag = False

                if first_flag:
                    for v_name, varin in fh_in.variables.items():
                        if v_name in selected_vars:
                            var_list.append(v_name)
                            outVar = fh_out.createVariable(v_name, 'f4', ("time", "lat", "lon",))
                            # outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
                            outVar[:] = ma.empty((n_t, n_dim['lat'], n_dim['lon']))
                        if v_name == 'ppt':
                            ppt_mask = ma.getmaskarray(fh_in.variables['ppt'][:])

                assert ppt_mask is not None
                for vname in selected_vars:
                    if vname != 'ppt':
                        var_value = ma.filled(fh_in.variables[vname][:], fill_value=fill_value_dic[vname])
                        var_value = ma.array(var_value, mask=ppt_mask)
                    else:
                        var_value = fh_in.variables[vname][:]
                    fh_out.variables[vname][i_month, :, :] = var_value
                fh_in.close()

            first_flag = False

        print(var_list)
        fh_out.close()