in utils/gradio_utils.py [0:0]
def sidebar_selection(ds_name_to_dict, column_id=""):
ds_names = list(ds_name_to_dict.keys())
with gr.Accordion(f"Choose dataset and field {column_id}", open=True):
subheader()
# choose a dataset to analyze
ds_name = gr.Dropdown(
label=f"Choose dataset to explore{column_id}:",
choices=ds_names,
value="hate_speech18",
)
# choose a config to analyze
ds_configs = ds_name_to_dict[ds_name.value]
# special handling for the largest-by-far dataset, C4
if ds_name == "c4":
config_names = ['en', 'en.noblocklist', 'realnewslike']
else:
config_names = list(ds_configs.keys())
config_name = gr.Dropdown(
label=f"Choose configuration{column_id}:",
choices=config_names,
value=config_names[0],
)
# choose a subset of num_examples
ds_config = ds_configs[config_name.value]
text_features = ds_config[HF_FEATURE_FIELD]["string"]
# TODO @yacine: Explain what this is doing and why eg tp[0] could = "id"
text = f"Which text feature from the {column_id} dataset would you like to analyze?"
choices = [('text',)] if ds_name == "c4" else [tp for tp in text_features if tp[0] != "id"]
text_field = gr.Dropdown(
label=text,
choices=[str(f) for f in choices],
value=str(choices[0])
)
# Choose a split and dataset size
avail_splits = list(ds_config["splits"].keys())
# 12.Nov note: Removing "test" because those should not be examined
# without discussion of pros and cons, which we haven't done yet.
if "test" in avail_splits:
avail_splits.remove("test")
split = gr.Dropdown(
label=f"Which split from the{column_id} dataset would you like to analyze?",
choices=avail_splits,
value=avail_splits[0],
)
label_field, label_names = get_label_names(ds_name.value, config_name.value, ds_name_to_dict)
calculate_btn = gr.Button(value="Calculate", variant="primary")
return {
"dset_name": ds_name,
"dset_config": config_name,
"split_name": split,
"text_field": text_field,
"label_field": label_field,
"label_names": label_names,
"calculate_btn": calculate_btn
}