def collectResultsFromQuery_OS

def collectResultsFromQuery_OS_segments()

in lib/telemetry.py [0:0]
70 lines of code
27 McCabe index (conditional complexity)

  def collectResultsFromQuery_OS_segments(self, results, branch, segment, event_metrics, histograms):
    for histogram in self.config['histograms']:
      df = histograms[histogram]
      if segment == "All":
        subset = df[df["branch"] == branch][['bucket', 'counts']].groupby(['bucket']).sum()
        buckets = list(subset.index)
        counts = list(subset['counts'])
      else:
        subset = df[(df["segment"] == segment) & (df["branch"] == branch)]
        buckets = list(subset['bucket'])
        counts = list(subset['counts'])

      # Some clients report bucket sizes that are not real, and these buckets
      # end up having 1-5 samples in them.  Filter these out entirely.
      if self.config['histograms'][histogram]['kind'] == 'numerical':
        remove=[]
        for i in range(1,len(counts)-1):
          if (counts[i-1] > 1000 and counts[i] < counts[i-1]/100) or \
             (counts[i+1] > 1000 and counts[i] < counts[i+1]/100):
            remove.append(i)
        for i in sorted(remove, reverse=True):
          del buckets[i]
          del counts[i]

      # Add labels to the buckets for categorical histograms.
      if self.config['histograms'][histogram]['kind'] == 'categorical':
        labels = self.config['histograms'][histogram]['labels']

        # Remove overflow bucket if it exists
        if len(labels)==(len(buckets)-1) and counts[-1]==0:
          del buckets[-1]
          del counts[-1]

        # Add missing buckets so they line up in each branch.
        if len(labels) > len(buckets):
          for i in range(len(buckets)):
            print(buckets[i], counts[i])
          new_counts = []
          for i,b in enumerate(labels):
            j = buckets.index(b) if b in buckets else None
            if j:
              new_counts.append(counts[j])
            else:
              new_counts.append(0)
          counts  = new_counts
         
        # Remap bucket values to the appropriate label names.
        buckets = labels

      # If there is a max, then overflow larger buckets into the max.
      if 'max' in self.config['histograms'][histogram]:
        maxBucket = self.config['histograms'][histogram]['max']
        remove=[]
        maxBucketCount=0
        for i,x in enumerate(buckets):
          if x >= maxBucket:
            remove.append(i)
            maxBucketCount = maxBucketCount + counts[i]
        for i in sorted(remove, reverse=True):
          del buckets[i]
          del counts[i]
        buckets.append(maxBucket)
        counts.append(maxBucketCount)

      assert len(buckets) == len(counts)
      results[branch][segment]['histograms'][histogram] = {}
      results[branch][segment]['histograms'][histogram]['bins'] = buckets
      results[branch][segment]['histograms'][histogram]['counts'] = counts
      print(f"    segment={segment} len(histogram: {histogram}) = ", len(buckets))

    for metric in self.config['pageload_event_metrics']:
      df = event_metrics[metric]
      if segment == "All":
        subset = df[df["branch"] == branch][['bucket', 'counts']].groupby(['bucket']).sum()
        buckets = list(subset.index)
        counts = list(subset['counts'])
      else:
        subset = df[(df["segment"] == segment) & (df["branch"] == branch)]
        buckets = list(subset['bucket'])
        counts = list(subset['counts'])

      assert len(buckets) == len(counts)
      results[branch][segment]['pageload_event_metrics'][metric] = {}
      results[branch][segment]['pageload_event_metrics'][metric]['bins'] = buckets
      results[branch][segment]['pageload_event_metrics'][metric]['counts'] = counts
      print(f"    segment={segment} len(pageload event: {metric}) = ", len(buckets))