in utils/gradio_utils.py [0:0]
def expander_text_lengths(dstats, column_id=""):
_TEXT_LENGTH_CAPTION = (
"Use this widget to identify outliers, particularly suspiciously long "
"outliers."
)
with st.expander(f"Text Lengths{column_id}", expanded=False):
st.caption(_TEXT_LENGTH_CAPTION)
st.markdown(
"Below, you can see how the lengths of the text instances in your "
"dataset are distributed."
)
st.markdown(
"Any unexpected peaks or valleys in the distribution may help to "
"identify instances you want to remove or augment."
)
st.markdown(
"### Here is the count of different text lengths in "
"your dataset:"
)
# When matplotlib first creates this, it's a Figure.
# Once it's saved, then read back in,
# it's an ndarray that must be displayed using st.image
# (I know, lame).
if isinstance(dstats.length_obj.fig_lengths, Figure):
st.pyplot(dstats.length_obj.fig_lengths, use_container_width=True)
else:
try:
st.image(dstats.length_obj.fig_lengths)
except Exception as e:
logs.exception("Hit exception for lengths figure:")
logs.exception(e)
st.markdown(
"The average length of text instances is **"
+ str(round(dstats.length_obj.avg_length, 2))
+ " words**, with a standard deviation of **"
+ str(round(dstats.length_obj.std_length, 2))
+ "**."
)
if dstats.length_obj.lengths_df is not None:
start_id_show_lengths = st.selectbox(
"Show examples of length:",
np.sort(dstats.length_obj.lengths_df["length"].unique())[::-1].tolist(),
key=f"select_show_length_{column_id}",
)
st.table(
dstats.length_obj.lengths_df[
dstats.length_obj.lengths_df["length"] == start_id_show_lengths
].set_index("length")
)