def log_plot_comparison()

in src/beanmachine/tutorials/utils/radon.py [0:0]


def log_plot_comparison(data: pd.Series, nbins: int = 40):
    """Compare data plot with the log(data) plot."""
    # Take the log of the given data.
    log_data = np.log(data + 0.01)

    # Determine histograms for the data.
    histogram, bins = np.histogram(data, bins=nbins)
    log_histogram, log_bins = np.histogram(log_data, bins=nbins)

    # Estimate the densities and scale them to their histograms.
    kde = sm.nonparametric.KDEUnivariate(data)
    kde.fit()
    scaled_density = (kde.density / kde.density.max()) * histogram.max()
    log_kde = sm.nonparametric.KDEUnivariate(log_data)
    log_kde.fit()
    log_scaled_density = (log_kde.density / log_kde.density.max()) * log_histogram.max()

    # Create the plots.
    plot = figure(
        plot_width=400,
        plot_height=400,
        title=f"Histogram of {data.name}",
        y_axis_label="Counts",
        x_axis_label=data.name,
    )
    log_plot = figure(
        plot_width=400,
        plot_height=400,
        title=f"Histogram of log({data.name})",
        y_axis_label="Counts",
        x_axis_label=f"log({data.name})",
    )

    # Bind data to the plots.
    density_source = ColumnDataSource({"x": kde.support, "y": scaled_density})
    density_glyph = plot.line(
        x="x",
        y="y",
        source=density_source,
        line_color="black",
        line_width=2.0,
        line_alpha=0.7,
        hover_line_color="brown",
        hover_line_width=3.0,
        hover_line_alpha=1.0,
        legend_label="Kernel density estimation",
    )
    density_tooltips = HoverTool(
        renderers=[density_glyph],
        tooltips=[
            ("Density", ""),
            ("Count", "@y"),
            (f"{data.name.title()}", "@x"),
        ],
    )
    plot.add_tools(density_tooltips)
    histogram_source = ColumnDataSource(
        {
            "left": bins[:-1],
            "right": bins[1:],
            "top": histogram,
            "bottom": np.zeros(histogram.shape[0]),
            "activity": [
                f"{item[0]:.3f} - {item[1]:.3f}" for item in zip(bins[:-1], bins[1:])
            ],
        }
    )
    histogram_glyph = plot.quad(
        left="left",
        right="right",
        top="top",
        bottom="bottom",
        source=histogram_source,
        fill_color="steelblue",
        fill_alpha=0.7,
        line_color="white",
        line_width=1.0,
        hover_color="orange",
        hover_alpha=1.0,
        hover_line_color="black",
        hover_line_width=2.0,
        legend_label="Histogram",
    )
    histogram_tooltips = HoverTool(
        renderers=[histogram_glyph],
        tooltips=[
            ("Histogram", ""),
            ("Counts", "@top"),
            (f"{data.name.title()}", "@activity"),
        ],
    )
    plot.add_tools(histogram_tooltips)

    log_density_source = ColumnDataSource(
        {"x": log_kde.support, "y": log_scaled_density}
    )
    log_density_glyph = log_plot.line(
        x="x",
        y="y",
        source=log_density_source,
        line_color="black",
        line_width=2.0,
        line_alpha=0.7,
        hover_line_color="brown",
        hover_line_width=3.0,
        hover_line_alpha=1.0,
    )
    log_density_tooltips = HoverTool(
        renderers=[log_density_glyph],
        tooltips=[
            ("Density", ""),
            ("Count", "@y"),
            (f"log({data.name})", "@x"),
        ],
    )
    log_plot.add_tools(log_density_tooltips)
    log_histogram_source = ColumnDataSource(
        {
            "left": log_bins[:-1],
            "right": log_bins[1:],
            "top": log_histogram,
            "bottom": np.zeros(log_histogram.shape[0]),
            "activity": [
                f"{item[0]:.3f} - {item[1]:.3f}"
                for item in zip(log_bins[:-1], log_bins[1:])
            ],
        }
    )
    log_histogram_glyph = log_plot.quad(
        left="left",
        right="right",
        top="top",
        bottom="bottom",
        source=log_histogram_source,
        fill_color="steelblue",
        fill_alpha=0.7,
        line_color="white",
        line_width=1.0,
        hover_color="orange",
        hover_alpha=1.0,
        hover_line_color="black",
        hover_line_width=2.0,
    )
    log_histogram_tooltips = HoverTool(
        renderers=[log_histogram_glyph],
        tooltips=[
            ("Histogram", ""),
            ("Counts", "@top"),
            (f"log({data.name.title()})", "@activity"),
        ],
    )
    log_plot.add_tools(log_histogram_tooltips)

    # Style the plots.
    plot.outline_line_color = "black"
    plot.grid.grid_line_color = "grey"
    plot.grid.grid_line_alpha = 0.2
    plot.grid.grid_line_width = 0.3
    log_plot.outline_line_color = "black"
    log_plot.grid.grid_line_color = "grey"
    log_plot.grid.grid_line_alpha = 0.2
    log_plot.grid.grid_line_width = 0.3

    return gridplot([[plot, log_plot]])