in evalbench/dataset/dataset.py [0:0]
def load_dataset_from_bird_format(dataset: Sequence[dict]):
input_items: dict[str, list[EvalInputRequest]] = {"dql": [], "dml": [], "ddl": []}
dialect = "sqlite"
query_type = "dql"
for item in dataset:
eval_input = EvalInputRequest(
id=item["question_id"],
nl_prompt="".join([item["question"], item["evidence"]]).replace(
"`", '"'
),
query_type=query_type,
database=item["db_id"],
dialects=[dialect],
golden_sql=item['SQL'],
eval_query="",
setup_sql="",
cleanup_sql="",
tags=[item["difficulty"]],
other={}
)
input_items[eval_input.query_type].append(eval_input)
return input_items