in mozetl/graphics/graphics_telemetry_dashboard.py [0:0]
def get_tdr_statistics():
num_tdr_reasons = 8
def ping_has_tdr_for(p, reason):
return p[DeviceResetReasonKey][reason] > 0
# Specialized version of map_x_to_y, for TDRs. We cast to int because for
# some reason the values Spark returns do not serialize with JSON.
def map_reason_to_vendor(p, reason, dest_key):
return (int(reason), {p[dest_key]: int(p[DeviceResetReasonKey][reason])})
def map_vendor_to_reason(p, reason, dest_key):
return (p[dest_key], {int(reason): int(p[DeviceResetReasonKey][reason])})
# Filter out pings that do not have any TDR data. We expect this to be a huge reduction
# in the sample set, and the resulting partition count gets way off. We repartition
# immediately for performance.
tdr_subset = windows_pings.filter(
lambda p: p.get(DeviceResetReasonKey, None) is not None
)
tdr_subset = tdr_subset.repartition(MaxPartitions)
tdr_subset = tdr_subset.cache()
# Aggregate the device reset data.
tdr_results = tdr_subset.map(lambda p: p[DeviceResetReasonKey]).reduce(
lambda x, y: x + y
)
# For each TDR reason, get a list tuple of (reason, vendor => resetCount). Then
# we combine these into a single series.
reason_to_vendor_tuples = None
vendor_to_reason_tuples = None
for reason in range(1, num_tdr_reasons):
subset = tdr_subset.filter(lambda p: ping_has_tdr_for(p, reason))
subset = subset.cache()
tuples = subset.map(lambda p: map_reason_to_vendor(p, reason, "vendorID"))
reason_to_vendor_tuples = union_pipelines(reason_to_vendor_tuples, tuples)
tuples = subset.map(lambda p: map_vendor_to_reason(p, reason, "vendorID"))
vendor_to_reason_tuples = union_pipelines(vendor_to_reason_tuples, tuples)
tdr_reason_to_vendor = reason_to_vendor_tuples.reduceByKey(combiner, MaxPartitions)
tdr_vendor_to_reason = vendor_to_reason_tuples.reduceByKey(combiner, MaxPartitions)
return {
"tdrPings": tdr_subset.count(),
"results": [int(value) for value in tdr_results],
"reasonToVendor": tdr_reason_to_vendor.collect(),
"vendorToReason": tdr_vendor_to_reason.collect(),
}