def assemble_matches_by_primary()

in tools/cdms/cdms_reader.py [0:0]


def assemble_matches_by_primary(filename):
    """
    Read a CDMS netCDF file and return a list of matches, in which secondary data
    points are grouped together by their primary data point match.
   
    This function returns matches in a different order than the 'assemble_matches' function.
    In this function, all secondary data is associated with its primary match without the need
    to access multiple matches. 

    Parameters
    ----------
    filename : str
        The CDMS netCDF file name.
    
    Returns
    -------
    matches : list
        List of matches. Each list element is a dictionary that maps a primary record to all of its associated secondary records.
        For match m, netCDF group GROUP (PrimaryData or SecondaryData), and
        group variable VARIABLE:

        matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match
        matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record
        matches[m][GROUP][VARIABLE]: variable value. Each VARIABLE is returned as a masked array. 

        ex. To access the first secondary time value available for a given match:
            matches[m]['SecondaryData']['time'][0]
    """
   
    try:
        # Open the netCDF file
        with Dataset(filename, 'r') as cdms_nc:
            # Check that the number of groups is consistent w/ the MatchedGroups
            # dimension
            assert len(cdms_nc.groups) == cdms_nc.dimensions['MatchedGroups'].size,\
                ("Number of groups isn't the same as MatchedGroups dimension.")
           
            matched_records = cdms_nc.dimensions['MatchedRecords'].size
            primary_matches = cdms_nc.groups['PrimaryData'].dimensions['dim'].size
            matches = [OrderedDict()] * primary_matches

            for match in range(matched_records):
                PID = int(cdms_nc.variables['matchIDs'][match][0])
        
                if len(matches[PID]) == 0: #establishes ordered dictionary for first match[PID]
                    matches[PID] = OrderedDict()

                for group_num, group in enumerate(cdms_nc.groups):
                    
                    if group_num == 0: #primary
                        
                        if group not in matches[PID].keys(): #initialization
                                matches[PID][group] = OrderedDict()
                                matches[PID][group]['matchID'] = []

                        matches[PID][group]['matchID'].append(match)
                        ID = cdms_nc.variables['matchIDs'][match][group_num]
                        matches[PID][group][group + 'ID'] = ID

                        for var in cdms_nc.groups[group].variables.keys():
                            matches[PID][group][var] = cdms_nc.groups[group][var][ID]
                        
                        dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units)
                        matches[PID][group]['datetime'] = dt

                    elif group_num == 1: #secondary

                        if group not in matches[PID].keys(): #initialization
                            matches[PID][group] = OrderedDict()
                            matches[PID][group]['matchID'] = []
                            matches[PID][group][group + 'ID'] = []
                            matches[PID][group]['datetime'] = []
                        
                        matches[PID][group]['matchID'].append(match)
                        ID = cdms_nc.variables['matchIDs'][match][group_num]
                        matches[PID][group][group + 'ID'].append(ID)
                        
                        for var in cdms_nc.groups[group].variables.keys():
                            if var not in matches[PID][group].keys():
                                matches[PID][group][var] = []
                            matches[PID][group][var].append(cdms_nc.groups[group][var][ID])

                        dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units)
                        matches[PID][group]['datetime'].append(dt[0])
                 
            return matches
    except (OSError, IOError) as err:
        LOGGER.exception("Error reading netCDF file " + filename)
        raise err