in eventdata/parameter_sources/weightedarray.py [0:0]
def __init__(self, json_file):
with gzip.open(json_file, 'rt') as data_file:
item_list = json.load(data_file)
# 1. Calculate a histogram of all weights.
h = self.histogram(item_list)
# 2. Calculate the weight that represents the last percent based on the histogram ...
bottom_percent_weight = self.weight_of_bottom_percent(h, percent=WeightedArray.CUTOFF_PERCENT)
# 3. ... so we can partition the items into the bottom and top parts.
#
# This implementation results in a peak memory usage of one client between 200 and 300 MB.
self._top_choices = self.create_items(item_list, min_weight=bottom_percent_weight)
self._bottom_choices = self.create_items(item_list, max_weight=bottom_percent_weight)
self._counter = 0
# we increment before accessing the elements
self._bottom_idx = -1
self._top_idx = -1
# Not calculating the length over and over on the hot code path gives us a little bit higher peak throughput
self._bottom_len = len(self._bottom_choices)
self._top_len = len(self._top_choices)