in libs/libcommon/src/libcommon/simple_cache.py [0:0]
def _get_df(entries: list[CacheEntryFullMetadata]) -> pd.DataFrame:
return pd.DataFrame(
{
"kind": pd.Series([entry["kind"] for entry in entries], dtype="category"),
"dataset": pd.Series([entry["dataset"] for entry in entries], dtype="str"),
"config": pd.Series([entry["config"] for entry in entries], dtype="str"),
"split": pd.Series([entry["split"] for entry in entries], dtype="str"),
"http_status": pd.Series(
[entry["http_status"] for entry in entries], dtype="category"
), # check if it's working as expected
"error_code": pd.Series([entry["error_code"] for entry in entries], dtype="category"),
"dataset_git_revision": pd.Series([entry["dataset_git_revision"] for entry in entries], dtype="str"),
"job_runner_version": pd.Series([entry["job_runner_version"] for entry in entries], dtype=pd.Int16Dtype()),
"progress": pd.Series([entry["progress"] for entry in entries], dtype="float"),
"updated_at": pd.Series(
[entry["updated_at"] for entry in entries], dtype="datetime64[ns]"
), # check if it's working as expected
"failed_runs": pd.Series([entry["failed_runs"] for entry in entries], dtype=pd.Int16Dtype()),
}
)