in tools/cdms/cdms_reader.py [0:0]
def assemble_matches_by_primary(filename):
"""
Read a CDMS netCDF file and return a list of matches, in which secondary data
points are grouped together by their primary data point match.
This function returns matches in a different order than the 'assemble_matches' function.
In this function, all secondary data is associated with its primary match without the need
to access multiple matches.
Parameters
----------
filename : str
The CDMS netCDF file name.
Returns
-------
matches : list
List of matches. Each list element is a dictionary that maps a primary record to all of its associated secondary records.
For match m, netCDF group GROUP (PrimaryData or SecondaryData), and
group variable VARIABLE:
matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match
matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record
matches[m][GROUP][VARIABLE]: variable value. Each VARIABLE is returned as a masked array.
ex. To access the first secondary time value available for a given match:
matches[m]['SecondaryData']['time'][0]
"""
try:
# Open the netCDF file
with Dataset(filename, 'r') as cdms_nc:
# Check that the number of groups is consistent w/ the MatchedGroups
# dimension
assert len(cdms_nc.groups) == cdms_nc.dimensions['MatchedGroups'].size,\
("Number of groups isn't the same as MatchedGroups dimension.")
matched_records = cdms_nc.dimensions['MatchedRecords'].size
primary_matches = cdms_nc.groups['PrimaryData'].dimensions['dim'].size
matches = [OrderedDict()] * primary_matches
for match in range(matched_records):
PID = int(cdms_nc.variables['matchIDs'][match][0])
if len(matches[PID]) == 0: #establishes ordered dictionary for first match[PID]
matches[PID] = OrderedDict()
for group_num, group in enumerate(cdms_nc.groups):
if group_num == 0: #primary
if group not in matches[PID].keys(): #initialization
matches[PID][group] = OrderedDict()
matches[PID][group]['matchID'] = []
matches[PID][group]['matchID'].append(match)
ID = cdms_nc.variables['matchIDs'][match][group_num]
matches[PID][group][group + 'ID'] = ID
for var in cdms_nc.groups[group].variables.keys():
matches[PID][group][var] = cdms_nc.groups[group][var][ID]
dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units)
matches[PID][group]['datetime'] = dt
elif group_num == 1: #secondary
if group not in matches[PID].keys(): #initialization
matches[PID][group] = OrderedDict()
matches[PID][group]['matchID'] = []
matches[PID][group][group + 'ID'] = []
matches[PID][group]['datetime'] = []
matches[PID][group]['matchID'].append(match)
ID = cdms_nc.variables['matchIDs'][match][group_num]
matches[PID][group][group + 'ID'].append(ID)
for var in cdms_nc.groups[group].variables.keys():
if var not in matches[PID][group].keys():
matches[PID][group][var] = []
matches[PID][group][var].append(cdms_nc.groups[group][var][ID])
dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units)
matches[PID][group]['datetime'].append(dt[0])
return matches
except (OSError, IOError) as err:
LOGGER.exception("Error reading netCDF file " + filename)
raise err