in front/admin_ui/app.py [0:0]
def fetch_home_dashboard(token):
out = {
home_dashboard_trending_datasets_infos_by_builder_name_table: gr.DataFrame(
value=None
),
home_dashboard_trending_datasets_coverage_stats_table: gr.DataFrame(
value=None
),
home_dashboard_trending_datasets_coverage_table: gr.DataFrame(
value=None
),
}
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(
f"{DV_ENDPOINT}/admin/num-dataset-infos-by-builder-name",
headers=headers,
timeout=60,
)
if response.status_code == 200:
num_infos_by_builder_name = response.json()
total_num_infos = sum(num_infos_by_builder_name.values())
num_public_datasets = sum(
1 for _ in hfh.HfApi(endpoint=HF_ENDPOINT).list_datasets()
)
out[
home_dashboard_trending_datasets_infos_by_builder_name_table
] = gr.DataFrame(
visible=True,
value=pd.DataFrame(
{
"Builder name": list(
num_infos_by_builder_name.keys()
),
"Count": list(num_infos_by_builder_name.values()),
r"% of all datasets with infos": [
f"{round(100 * num_infos / total_num_infos, 2)}%"
for num_infos in num_infos_by_builder_name.values()
],
r"% of all public datasets": [
f"{round(100 * num_infos / num_public_datasets, 2)}%"
for num_infos in num_infos_by_builder_name.values()
],
}
),
)
else:
out[
home_dashboard_trending_datasets_infos_by_builder_name_table
] = gr.DataFrame(
visible=True,
value=pd.DataFrame(
{
"Error": [
f"❌ Failed to fetch dataset infos from {DV_ENDPOINT} (error {response.status_code})"
]
}
),
)
response = requests.get(
f"{HF_ENDPOINT}/api/trending?type=dataset&limit=20", timeout=60
)
if response.status_code == 200:
trending_datasets = [
repo_info["repoData"]["id"]
for repo_info in response.json()["recentlyTrending"]
]
def get_is_valid_response(dataset: str):
return requests.get(
f"{DV_ENDPOINT}/is-valid?dataset={dataset}",
headers=headers,
timeout=60,
)
is_valid_responses = thread_map(
get_is_valid_response,
trending_datasets,
desc="get_is_valid_response",
)
trending_datasets_coverage = {"All trending datasets": []}
error_datasets = []
unauthorized_datasets = []
for dataset, is_valid_response in zip(
trending_datasets, is_valid_responses
):
if is_valid_response.status_code == 200:
response_json = is_valid_response.json()
trending_datasets_coverage[
"All trending datasets"
].append(dataset)
for is_valid_field in response_json:
pretty_field = is_valid_field.replace(
"_", " "
).capitalize()
if pretty_field not in trending_datasets_coverage:
trending_datasets_coverage[pretty_field] = []
trending_datasets_coverage[pretty_field].append(
"✅"
if response_json[is_valid_field] is True
else "❌"
)
elif is_valid_response.status_code == 500:
error_datasets.append(dataset)
else:
unauthorized_datasets.append(dataset)
def fill_empty_cells(datasets, sign):
trending_datasets_coverage[
"All trending datasets"
] += datasets
for pretty_field in trending_datasets_coverage:
trending_datasets_coverage[pretty_field] += [sign] * (
len(
trending_datasets_coverage[
"All trending datasets"
]
)
- len(trending_datasets_coverage[pretty_field])
)
fill_empty_cells(error_datasets, "❌")
fill_empty_cells(unauthorized_datasets, "🚫")
out[
home_dashboard_trending_datasets_coverage_table
] = gr.DataFrame(
visible=True, value=pd.DataFrame(trending_datasets_coverage)
)
trending_datasets_coverage_stats = {
"Num trending datasets": [len(trending_datasets)],
**{
is_valid_field: [
f"{round(100 * sum(1 for coverage in trending_datasets_coverage[is_valid_field] if coverage == '✅') / len(trending_datasets), 2)}%"
]
for is_valid_field in trending_datasets_coverage
if is_valid_field != "All trending datasets"
},
}
out[
home_dashboard_trending_datasets_coverage_stats_table
] = gr.DataFrame(
visible=True,
value=pd.DataFrame(trending_datasets_coverage_stats),
)
else:
out[
home_dashboard_trending_datasets_coverage_table
] = gr.DataFrame(
visible=True,
value=pd.DataFrame(
{
"Error": [
f"❌ Failed to fetch trending datasets from {HF_ENDPOINT} (error {response.status_code})"
]
}
),
)
return out