in evalbench/dataset/dataset.py [0:0]
def load_dataset(dataset: Sequence[dict], config):
input_items: dict[str, list[EvalInputRequest]] = {"dql": [], "dml": [], "ddl": []}
for item in dataset:
if not _item_meets_config_filters(item, config):
continue
eval_input = EvalInputRequest(
id=item["id"],
nl_prompt=item["nl_prompt"],
query_type=item["query_type"].lower(),
database=item["database"],
dialects=_union_dialects(item["dialects"], config.get("dialects", [])),
golden_sql=item["golden_sql"],
eval_query=item["eval_query"],
setup_sql=item["setup_sql"],
cleanup_sql=item["cleanup_sql"],
tags=item["tags"],
other=build_normalized_other(item["other"]),
)
input_items[eval_input.query_type].append(eval_input)
return input_items