in src/beanmachine/tutorials/utils/radon.py [0:0]
def log_plot_comparison(data: pd.Series, nbins: int = 40):
"""Compare data plot with the log(data) plot."""
# Take the log of the given data.
log_data = np.log(data + 0.01)
# Determine histograms for the data.
histogram, bins = np.histogram(data, bins=nbins)
log_histogram, log_bins = np.histogram(log_data, bins=nbins)
# Estimate the densities and scale them to their histograms.
kde = sm.nonparametric.KDEUnivariate(data)
kde.fit()
scaled_density = (kde.density / kde.density.max()) * histogram.max()
log_kde = sm.nonparametric.KDEUnivariate(log_data)
log_kde.fit()
log_scaled_density = (log_kde.density / log_kde.density.max()) * log_histogram.max()
# Create the plots.
plot = figure(
plot_width=400,
plot_height=400,
title=f"Histogram of {data.name}",
y_axis_label="Counts",
x_axis_label=data.name,
)
log_plot = figure(
plot_width=400,
plot_height=400,
title=f"Histogram of log({data.name})",
y_axis_label="Counts",
x_axis_label=f"log({data.name})",
)
# Bind data to the plots.
density_source = ColumnDataSource({"x": kde.support, "y": scaled_density})
density_glyph = plot.line(
x="x",
y="y",
source=density_source,
line_color="black",
line_width=2.0,
line_alpha=0.7,
hover_line_color="brown",
hover_line_width=3.0,
hover_line_alpha=1.0,
legend_label="Kernel density estimation",
)
density_tooltips = HoverTool(
renderers=[density_glyph],
tooltips=[
("Density", ""),
("Count", "@y"),
(f"{data.name.title()}", "@x"),
],
)
plot.add_tools(density_tooltips)
histogram_source = ColumnDataSource(
{
"left": bins[:-1],
"right": bins[1:],
"top": histogram,
"bottom": np.zeros(histogram.shape[0]),
"activity": [
f"{item[0]:.3f} - {item[1]:.3f}" for item in zip(bins[:-1], bins[1:])
],
}
)
histogram_glyph = plot.quad(
left="left",
right="right",
top="top",
bottom="bottom",
source=histogram_source,
fill_color="steelblue",
fill_alpha=0.7,
line_color="white",
line_width=1.0,
hover_color="orange",
hover_alpha=1.0,
hover_line_color="black",
hover_line_width=2.0,
legend_label="Histogram",
)
histogram_tooltips = HoverTool(
renderers=[histogram_glyph],
tooltips=[
("Histogram", ""),
("Counts", "@top"),
(f"{data.name.title()}", "@activity"),
],
)
plot.add_tools(histogram_tooltips)
log_density_source = ColumnDataSource(
{"x": log_kde.support, "y": log_scaled_density}
)
log_density_glyph = log_plot.line(
x="x",
y="y",
source=log_density_source,
line_color="black",
line_width=2.0,
line_alpha=0.7,
hover_line_color="brown",
hover_line_width=3.0,
hover_line_alpha=1.0,
)
log_density_tooltips = HoverTool(
renderers=[log_density_glyph],
tooltips=[
("Density", ""),
("Count", "@y"),
(f"log({data.name})", "@x"),
],
)
log_plot.add_tools(log_density_tooltips)
log_histogram_source = ColumnDataSource(
{
"left": log_bins[:-1],
"right": log_bins[1:],
"top": log_histogram,
"bottom": np.zeros(log_histogram.shape[0]),
"activity": [
f"{item[0]:.3f} - {item[1]:.3f}"
for item in zip(log_bins[:-1], log_bins[1:])
],
}
)
log_histogram_glyph = log_plot.quad(
left="left",
right="right",
top="top",
bottom="bottom",
source=log_histogram_source,
fill_color="steelblue",
fill_alpha=0.7,
line_color="white",
line_width=1.0,
hover_color="orange",
hover_alpha=1.0,
hover_line_color="black",
hover_line_width=2.0,
)
log_histogram_tooltips = HoverTool(
renderers=[log_histogram_glyph],
tooltips=[
("Histogram", ""),
("Counts", "@top"),
(f"log({data.name.title()})", "@activity"),
],
)
log_plot.add_tools(log_histogram_tooltips)
# Style the plots.
plot.outline_line_color = "black"
plot.grid.grid_line_color = "grey"
plot.grid.grid_line_alpha = 0.2
plot.grid.grid_line_width = 0.3
log_plot.outline_line_color = "black"
log_plot.grid.grid_line_color = "grey"
log_plot.grid.grid_line_alpha = 0.2
log_plot.grid.grid_line_width = 0.3
return gridplot([[plot, log_plot]])