def expander_text_duplicates()

in utils/gradio_utils.py [0:0]


def expander_text_duplicates(dstats, column_id=""):
    with st.expander(f"Text Duplicates{column_id}", expanded=False):
        st.caption(
            "Use this widget to identify text strings that appear more than "
            "once."
        )
        st.markdown(
            "A model's training and testing may be negatively affected by "
            "unwarranted duplicates "
            "([Lee et al., 2021](https://arxiv.org/abs/2107.06499))."
        )
        st.markdown("------")
        st.write(
            "### Here is the list of all the duplicated items and their counts "
            "in the dataset."
        )
        if not dstats.duplicates_results:
            st.write("There are no duplicates in this dataset! 🥳")
        else:
            st.write("The fraction of the data that is a duplicate is:")
            st.write(str(round(dstats.dups_frac, 4)))
            # TODO: Check if this is slow when the size is large --
            # Should we store as dataframes?
            # Dataframes allow this to be interactive.
            st.dataframe(ds_utils.counter_dict_to_df(dstats.dups_dict))