in src/filter_and_group.py [0:0]
def arrange_into_groups(datasets, group_by, column, column_timing, labels):
timestamp_groups = []
datapoint_groups = []
group_labels = []
for idx, df in enumerate(datasets): # Loop through all provided log datasets
if not df.empty:
if group_by not in df:
print("Warning: group_by group " + str(group_by) + " column not in dataset with columns " + str(df.columns))
elif column not in df:
print("Warning: column \"" + str(column) + "\" not in dataset with columns " + str(df.columns))
elif column_timing not in df:
print("Warning: column_timing \"" + str(column_timing) + "\" not in dataset with columns " + str(df.columns))
else:
# A non-empty df contains both X and Y columns.
groups = {} # Create a dictionary to hold unique groups
if column_timing == "DateTime":
print("Case number 1")
timing = pd.Series(matplotlib.dates.date2num(df[column_timing]))
else:
timing = df[column_timing]
for group, time, datapoint in zip(df[group_by], timing, df[column]):
if not group:
group = "( " + str(group_by) + " = None )" # None groups should all be put together
if group not in groups:
# Create a new group for each unique item
groups[group] = [[], [], str(labels[idx]) + ": " + str(group)]
# add the datapoints and time, based on the grouping
groups[group][0].append(time)
groups[group][1].append(datapoint)
# Sort keys so groups print in the same order between files
keys = list(groups.keys())
keys.sort()
for key in keys:
timestamp_groups.append(pd.Series(groups[key][0]))
datapoint_groups.append(pd.Series(groups[key][1]))
group_labels.append(groups[key][2])
return timestamp_groups, datapoint_groups, group_labels