def expander_text_lengths()

in utils/gradio_utils.py [0:0]


def expander_text_lengths(dstats, column_id=""):
    _TEXT_LENGTH_CAPTION = (
        "Use this widget to identify outliers, particularly suspiciously long "
        "outliers."
    )
    with st.expander(f"Text Lengths{column_id}", expanded=False):
        st.caption(_TEXT_LENGTH_CAPTION)
        st.markdown(
            "Below, you can see how the lengths of the text instances in your "
            "dataset are distributed."
        )
        st.markdown(
            "Any unexpected peaks or valleys in the distribution may help to "
            "identify instances you want to remove or augment."
        )
        st.markdown(
            "### Here is the count of different text lengths in "
            "your dataset:"
        )
        # When matplotlib first creates this, it's a Figure.
        # Once it's saved, then read back in,
        # it's an ndarray that must be displayed using st.image
        # (I know, lame).
        if isinstance(dstats.length_obj.fig_lengths, Figure):
            st.pyplot(dstats.length_obj.fig_lengths, use_container_width=True)
        else:
            try:
                st.image(dstats.length_obj.fig_lengths)
            except Exception as e:
                logs.exception("Hit exception for lengths figure:")
                logs.exception(e)
        st.markdown(
            "The average length of text instances is **"
            + str(round(dstats.length_obj.avg_length, 2))
            + " words**, with a standard deviation of **"
            + str(round(dstats.length_obj.std_length, 2))
            + "**."
        )
        if dstats.length_obj.lengths_df is not None:
            start_id_show_lengths = st.selectbox(
                "Show examples of length:",
                np.sort(dstats.length_obj.lengths_df["length"].unique())[::-1].tolist(),
                key=f"select_show_length_{column_id}",
            )
            st.table(
                dstats.length_obj.lengths_df[
                    dstats.length_obj.lengths_df["length"] == start_id_show_lengths
                ].set_index("length")
            )