in utils/gradio_utils.py [0:0]
def expander_general_stats(dstats, column_id=""):
with gr.Accordion(f"General Text Statistics{column_id}"):
st.caption(
"Use this widget to check whether the terms you see most "
"represented in the dataset make sense for the goals of the dataset."
)
st.markdown("There are {0} total words".format(str(dstats.total_words)))
st.markdown(
"There are {0} words after removing closed "
"class words".format(str(dstats.total_open_words))
)
st.markdown(
"The most common "
"[open class words](https://dictionary.apa.org/open-class-words) "
"and their counts are: "
)
st.dataframe(dstats.sorted_top_vocab_df)
st.markdown(
"There are {0} missing values in the dataset.".format(
str(dstats.text_nan_count)
)
)
if dstats.dups_frac > 0:
st.markdown(
"The dataset is {0}% duplicates. "
"For more information about the duplicates, "
"click the 'Duplicates' tab below.".format(
str(round(dstats.dups_frac * 100, 2)))
)
else:
st.markdown("There are 0 duplicate items in the dataset. ")