in demo-python/code/data-chunking/lib/common.py [0:0]
def plot_chunk_histogram(chunks, length_fn, title, xlabel, ylabel="Chunk Count"):
def round_to_lowest_multiple(number, multiple):
return (number // multiple) * multiple
def round_to_highest_multiple(number, multiple):
return math.ceil(number / multiple) * multiple
ys = [length_fn(chunk) for chunk in chunks]
min_y = min(ys)
max_y = max(ys)
bins=25
n, _, _ = plt.hist(ys, edgecolor="black", bins=bins)
# Set y-axis limits to remove the gap at the top
max_freq = max(n)
plt.ylim(0, max_freq)
# Spacing for ticks on x-axis and x-axis limits to remove gaps
tick_step = max(int(round_to_lowest_multiple((max_y-min_y)/5, 100)), 100)
max_xtick = round_to_highest_multiple(max_y, tick_step)
xticks = list(np.arange(start=round_to_lowest_multiple(min_y, tick_step), stop=round_to_highest_multiple(max_xtick, tick_step), step=tick_step))
if max_xtick and xticks[-1] != max_xtick:
xticks.append(max_xtick)
plt.xticks(xticks)
plt.xlim(round_to_lowest_multiple(min_y, tick_step), max_xtick)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
plt.show()