in lib/telemetry.py [0:0]
def collectResultsFromQuery_OS_segments(self, results, branch, segment, event_metrics, histograms):
for histogram in self.config['histograms']:
df = histograms[histogram]
if segment == "All":
subset = df[df["branch"] == branch][['bucket', 'counts']].groupby(['bucket']).sum()
buckets = list(subset.index)
counts = list(subset['counts'])
else:
subset = df[(df["segment"] == segment) & (df["branch"] == branch)]
buckets = list(subset['bucket'])
counts = list(subset['counts'])
# Some clients report bucket sizes that are not real, and these buckets
# end up having 1-5 samples in them. Filter these out entirely.
if self.config['histograms'][histogram]['kind'] == 'numerical':
remove=[]
for i in range(1,len(counts)-1):
if (counts[i-1] > 1000 and counts[i] < counts[i-1]/100) or \
(counts[i+1] > 1000 and counts[i] < counts[i+1]/100):
remove.append(i)
for i in sorted(remove, reverse=True):
del buckets[i]
del counts[i]
# Add labels to the buckets for categorical histograms.
if self.config['histograms'][histogram]['kind'] == 'categorical':
labels = self.config['histograms'][histogram]['labels']
# Remove overflow bucket if it exists
if len(labels)==(len(buckets)-1) and counts[-1]==0:
del buckets[-1]
del counts[-1]
# Add missing buckets so they line up in each branch.
if len(labels) > len(buckets):
for i in range(len(buckets)):
print(buckets[i], counts[i])
new_counts = []
for i,b in enumerate(labels):
j = buckets.index(b) if b in buckets else None
if j:
new_counts.append(counts[j])
else:
new_counts.append(0)
counts = new_counts
# Remap bucket values to the appropriate label names.
buckets = labels
# If there is a max, then overflow larger buckets into the max.
if 'max' in self.config['histograms'][histogram]:
maxBucket = self.config['histograms'][histogram]['max']
remove=[]
maxBucketCount=0
for i,x in enumerate(buckets):
if x >= maxBucket:
remove.append(i)
maxBucketCount = maxBucketCount + counts[i]
for i in sorted(remove, reverse=True):
del buckets[i]
del counts[i]
buckets.append(maxBucket)
counts.append(maxBucketCount)
assert len(buckets) == len(counts)
results[branch][segment]['histograms'][histogram] = {}
results[branch][segment]['histograms'][histogram]['bins'] = buckets
results[branch][segment]['histograms'][histogram]['counts'] = counts
print(f" segment={segment} len(histogram: {histogram}) = ", len(buckets))
for metric in self.config['pageload_event_metrics']:
df = event_metrics[metric]
if segment == "All":
subset = df[df["branch"] == branch][['bucket', 'counts']].groupby(['bucket']).sum()
buckets = list(subset.index)
counts = list(subset['counts'])
else:
subset = df[(df["segment"] == segment) & (df["branch"] == branch)]
buckets = list(subset['bucket'])
counts = list(subset['counts'])
assert len(buckets) == len(counts)
results[branch][segment]['pageload_event_metrics'][metric] = {}
results[branch][segment]['pageload_event_metrics'][metric]['bins'] = buckets
results[branch][segment]['pageload_event_metrics'][metric]['counts'] = counts
print(f" segment={segment} len(pageload event: {metric}) = ", len(buckets))