in utils/gradio_utils.py [0:0]
def expander_text_perplexities(dstats, column_id=""):
with st.expander(f"Text Perplexities{column_id}", expanded=False):
st.caption(
"Use this widget to identify text perplexities from GPT-2."
)
st.markdown(
"""
Outlier perplexities, especially very high values, could highlight
an issue with an example. Smaller variations should be interpreted
with more care, as they indicate how similar to the GPT-2 training
corpus the examples are rather than being reflective of general
linguistic properties.
For more information on GPT-2,
see its [model card](https://hf.co/gpt2).
"""
)
st.markdown("------")
st.write(
"### Here is the list of the examples in the dataset, sorted by "
"GPT-2 perplexity:"
)
if dstats.perplexities_df is None or dstats.perplexities_df.empty:
st.write(
"Perplexities have not been computed yet for this dataset, or "
"this dataset is too large for the UI (> 1,000,000 examples).")
else:
st.dataframe(dstats.perplexities_df.reset_index(drop=True))