def get_tdr_statistics()

in mozetl/graphics/graphics_telemetry_dashboard.py [0:0]
33 lines of code
7 McCabe index (conditional complexity)

def get_tdr_statistics():
    num_tdr_reasons = 8

    def ping_has_tdr_for(p, reason):
        return p[DeviceResetReasonKey][reason] > 0

    # Specialized version of map_x_to_y, for TDRs. We cast to int because for
    # some reason the values Spark returns do not serialize with JSON.
    def map_reason_to_vendor(p, reason, dest_key):
        return (int(reason), {p[dest_key]: int(p[DeviceResetReasonKey][reason])})

    def map_vendor_to_reason(p, reason, dest_key):
        return (p[dest_key], {int(reason): int(p[DeviceResetReasonKey][reason])})

    # Filter out pings that do not have any TDR data. We expect this to be a huge reduction
    # in the sample set, and the resulting partition count gets way off. We repartition
    # immediately for performance.
    tdr_subset = windows_pings.filter(
        lambda p: p.get(DeviceResetReasonKey, None) is not None
    )
    tdr_subset = tdr_subset.repartition(MaxPartitions)
    tdr_subset = tdr_subset.cache()

    # Aggregate the device reset data.
    tdr_results = tdr_subset.map(lambda p: p[DeviceResetReasonKey]).reduce(
        lambda x, y: x + y
    )

    # For each TDR reason, get a list tuple of (reason, vendor => resetCount). Then
    # we combine these into a single series.
    reason_to_vendor_tuples = None
    vendor_to_reason_tuples = None
    for reason in range(1, num_tdr_reasons):
        subset = tdr_subset.filter(lambda p: ping_has_tdr_for(p, reason))
        subset = subset.cache()

        tuples = subset.map(lambda p: map_reason_to_vendor(p, reason, "vendorID"))
        reason_to_vendor_tuples = union_pipelines(reason_to_vendor_tuples, tuples)

        tuples = subset.map(lambda p: map_vendor_to_reason(p, reason, "vendorID"))
        vendor_to_reason_tuples = union_pipelines(vendor_to_reason_tuples, tuples)

    tdr_reason_to_vendor = reason_to_vendor_tuples.reduceByKey(combiner, MaxPartitions)
    tdr_vendor_to_reason = vendor_to_reason_tuples.reduceByKey(combiner, MaxPartitions)

    return {
        "tdrPings": tdr_subset.count(),
        "results": [int(value) for value in tdr_results],
        "reasonToVendor": tdr_reason_to_vendor.collect(),
        "vendorToReason": tdr_vendor_to_reason.collect(),
    }