def merge_various_days()

in data_preprocessing/merge/merge_various_days.py [0:0]


def merge_various_days(in_path, out_path, fout_name, doy_start=None, doy_end=None, select_vars=None):
    fh_out = Dataset(os.path.join(out_path, fout_name + '.nc'), 'w')

    num = 0
    var_list = []

    if doy_start is None or doy_end is None:
        fnames = [fname[:-3] for fname in os.listdir(in_path) if fname.endswith(".nc")]
        fnames = sorted(fnames, key=lambda x: datetime.strptime("".join(c for c in x if c.isdigit()), '%Y%m%d'))
    else:
        fnames = list(generate_doy(doy_start, doy_end, ""))
    num_files = len(fnames)
    print("Number of files", num_files)

    for nc_file in fnames:
        nc_doy = "".join(c for c in nc_file if c.isdigit())
        fh_in = Dataset(os.path.join(in_path, nc_file + ".nc"), 'r')
        n_dim = {}
        if num == 0:
            for name, dim in fh_in.dimensions.items():
                n_dim[name] = len(dim)
                fh_out.createDimension(name, len(dim) if not dim.isunlimited() else None)

            fh_out.createDimension('time', num_files)
            outVar = fh_out.createVariable('time', 'int', ("time",))
            outVar[:] = range(1, num_files + 1)

            select_vars = list(fh_in.variables.keys()) if select_vars is None else select_vars
            for v_name, varin in fh_in.variables.items():
                if v_name == 'lat' or v_name == 'lon':
                    outVar = fh_out.createVariable(v_name, varin.datatype, varin.dimensions)
                    outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
                    outVar[:] = varin[:]
                else:
                    if v_name in select_vars:
                        var_list.append(v_name)
                        outVar = fh_out.createVariable(v_name, varin.datatype, ("time", "lat", "lon",))
                        outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
                        outVar[:] = np.empty((num_files, n_dim['lat'], n_dim['lon']))

        current_date = datetime.strptime(nc_doy, "%Y%m%d").date()
        fh_out.variables['time'][num] = (current_date - FIRST_DATE).days
        for vname in var_list:
            var_value = fh_in.variables[vname][:]
            fh_out.variables[vname][num, :, :] = var_value[:]

        num += 1
        fh_in.close()
    fh_out.close()

    print(num, num_files)
    assert (num == num_files)