in utils/gradio_utils.py [0:0]
def npmi_show(paired_results):
if paired_results.empty:
st.markdown(
"No words that co-occur enough times for results! Or there's a 🐛."
" Or we're still computing this one. 🤷")
else:
logs.debug("Results to be shown in streamlit are")
logs.debug(paired_results)
s = pd.DataFrame(
paired_results.sort_values(paired_results.columns[0], ascending=True))
s.index.name = "word"
bias_col = s.filter(like="bias").columns
#count_cols = s.filter(like="count").columns
# Keep the dataframe from being crazy big.
if s.shape[0] > 10000:
bias_thres = max(abs(s[s[0]][5000]),
abs(s[s[0]][-5000]))
logs.info(f"filtering with bias threshold: {bias_thres}")
s_filtered = s[s[0].abs() > bias_thres]
else:
s_filtered = s
cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
out_df = s_filtered.style.background_gradient(subset=bias_col, cmap=cm).format(formatter="{:,.3f}").set_properties(**{"align": "center", "width":"100em"}).set_caption("nPMI scores between the selected identity terms and the words they both co-occur with")
#set_properties(subset=count_cols, **{"width": "10em", "text-align": "center"}).
# .format(subset=count_cols, formatter=int).
#.format(subset=bias_col, formatter="{:,.3f}")
st.write("### Here is your dataset's bias results:")
st.dataframe(out_df)