in datafusion/core/benches/sql_planner.rs [223:514]
fn criterion_benchmark(c: &mut Criterion) {
// verify that we can load the clickbench data prior to running the benchmark
if !PathBuf::from(format!("{BENCHMARKS_PATH_1}{CLICKBENCH_DATA_PATH}")).exists()
&& !PathBuf::from(format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}")).exists()
{
panic!("benchmarks/data/hits_partitioned/ could not be loaded. Please run \
'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark")
}
let ctx = create_context();
let rt = Runtime::new().unwrap();
// Test simplest
// https://github.com/apache/datafusion/issues/5157
c.bench_function("logical_select_one_from_700", |b| {
b.iter(|| logical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
});
// Test simplest
// https://github.com/apache/datafusion/issues/5157
c.bench_function("physical_select_one_from_700", |b| {
b.iter(|| physical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
});
// Test simplest
c.bench_function("logical_select_all_from_1000", |b| {
b.iter(|| logical_plan(&ctx, &rt, "SELECT * FROM t1000"))
});
// Test simplest
c.bench_function("physical_select_all_from_1000", |b| {
b.iter(|| physical_plan(&ctx, &rt, "SELECT * FROM t1000"))
});
c.bench_function("logical_trivial_join_low_numbered_columns", |b| {
b.iter(|| {
logical_plan(
&ctx,
&rt,
"SELECT t1.a2, t2.b2 \
FROM t1, t2 WHERE a1 = b1",
)
})
});
c.bench_function("logical_trivial_join_high_numbered_columns", |b| {
b.iter(|| {
logical_plan(
&ctx,
&rt,
"SELECT t1.a99, t2.b99 \
FROM t1, t2 WHERE a199 = b199",
)
})
});
c.bench_function("logical_aggregate_with_join", |b| {
b.iter(|| {
logical_plan(
&ctx,
&rt,
"SELECT t1.a99, MIN(t2.b1), MAX(t2.b199), AVG(t2.b123), COUNT(t2.b73) \
FROM t1 JOIN t2 ON t1.a199 = t2.b199 GROUP BY t1.a99",
)
})
});
c.bench_function("physical_select_aggregates_from_200", |b| {
let mut aggregates = String::new();
for i in 0..200 {
if i > 0 {
aggregates.push_str(", ");
}
aggregates.push_str(format!("MAX(a{})", i).as_str());
}
let query = format!("SELECT {} FROM t1", aggregates);
b.iter(|| {
physical_plan(&ctx, &rt, &query);
});
});
// Benchmark for Physical Planning Joins
c.bench_function("physical_join_consider_sort", |b| {
b.iter(|| {
physical_plan(
&ctx,
&rt,
"SELECT t1.a7, t2.b8 \
FROM t1, t2 WHERE a7 = b7 \
ORDER BY a7",
);
});
});
c.bench_function("physical_theta_join_consider_sort", |b| {
b.iter(|| {
physical_plan(
&ctx,
&rt,
"SELECT t1.a7, t2.b8 \
FROM t1, t2 WHERE a7 < b7 \
ORDER BY a7",
);
});
});
c.bench_function("physical_many_self_joins", |b| {
b.iter(|| {
physical_plan(
&ctx,
&rt,
"SELECT ta.a9, tb.a10, tc.a11, td.a12, te.a13, tf.a14 \
FROM t1 AS ta, t1 AS tb, t1 AS tc, t1 AS td, t1 AS te, t1 AS tf \
WHERE ta.a9 = tb.a10 AND tb.a10 = tc.a11 AND tc.a11 = td.a12 AND \
td.a12 = te.a13 AND te.a13 = tf.a14",
);
});
});
c.bench_function("physical_unnest_to_join", |b| {
b.iter(|| {
physical_plan(
&ctx,
&rt,
"SELECT t1.a7 \
FROM t1 WHERE a7 = (SELECT b8 FROM t2)",
);
});
});
c.bench_function("physical_intersection", |b| {
b.iter(|| {
physical_plan(
&ctx,
&rt,
"SELECT t1.a7 FROM t1 \
INTERSECT SELECT t2.b8 FROM t2",
);
});
});
// these two queries should be equivalent
c.bench_function("physical_join_distinct", |b| {
b.iter(|| {
logical_plan(
&ctx,
&rt,
"SELECT DISTINCT t1.a7 \
FROM t1, t2 WHERE t1.a7 = t2.b8",
);
});
});
// -- Sorted Queries --
register_union_order_table(&ctx, 100, 1000);
// this query has many expressions in its sort order so stresses
// order equivalence validation
c.bench_function("physical_sorted_union_orderby", |b| {
// SELECT ... UNION ALL ...
let query = union_orderby_query(20);
b.iter(|| physical_plan(&ctx, &rt, &query))
});
// --- TPC-H ---
let tpch_ctx = register_defs(SessionContext::new(), tpch_schemas());
let tpch_queries = [
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
"q14", // "q15", q15 has multiple SQL statements which is not supported
"q16", "q17", "q18", "q19", "q20", "q21", "q22",
];
let benchmarks_path = if PathBuf::from(BENCHMARKS_PATH_1).exists() {
BENCHMARKS_PATH_1
} else {
BENCHMARKS_PATH_2
};
for q in tpch_queries {
let sql =
std::fs::read_to_string(format!("{benchmarks_path}queries/{q}.sql")).unwrap();
c.bench_function(&format!("physical_plan_tpch_{}", q), |b| {
b.iter(|| physical_plan(&tpch_ctx, &rt, &sql))
});
}
let all_tpch_sql_queries = tpch_queries
.iter()
.map(|q| {
std::fs::read_to_string(format!("{benchmarks_path}queries/{q}.sql")).unwrap()
})
.collect::<Vec<_>>();
c.bench_function("physical_plan_tpch_all", |b| {
b.iter(|| {
for sql in &all_tpch_sql_queries {
physical_plan(&tpch_ctx, &rt, sql)
}
})
});
// c.bench_function("logical_plan_tpch_all", |b| {
// b.iter(|| {
// for sql in &all_tpch_sql_queries {
// logical_plan(&tpch_ctx, sql)
// }
// })
// });
// --- TPC-DS ---
let tpcds_ctx = register_defs(SessionContext::new(), tpcds_schemas());
let tests_path = if PathBuf::from("./tests/").exists() {
"./tests/"
} else {
"datafusion/core/tests/"
};
let raw_tpcds_sql_queries = (1..100)
.map(|q| std::fs::read_to_string(format!("{tests_path}tpc-ds/{q}.sql")).unwrap())
.collect::<Vec<_>>();
// some queries have multiple statements
let all_tpcds_sql_queries = raw_tpcds_sql_queries
.iter()
.flat_map(|sql| sql.split(';').filter(|s| !s.trim().is_empty()))
.collect::<Vec<_>>();
c.bench_function("physical_plan_tpcds_all", |b| {
b.iter(|| {
for sql in &all_tpcds_sql_queries {
physical_plan(&tpcds_ctx, &rt, sql)
}
})
});
// c.bench_function("logical_plan_tpcds_all", |b| {
// b.iter(|| {
// for sql in &all_tpcds_sql_queries {
// logical_plan(&tpcds_ctx, sql)
// }
// })
// });
// -- clickbench --
let queries_file =
File::open(format!("{benchmarks_path}queries/clickbench/queries.sql")).unwrap();
let extended_file =
File::open(format!("{benchmarks_path}queries/clickbench/extended.sql")).unwrap();
let clickbench_queries: Vec<String> = BufReader::new(queries_file)
.lines()
.chain(BufReader::new(extended_file).lines())
.map(|l| l.expect("Could not parse line"))
.collect_vec();
let clickbench_ctx = register_clickbench_hits_table(&rt);
// for (i, sql) in clickbench_queries.iter().enumerate() {
// c.bench_function(&format!("logical_plan_clickbench_q{}", i + 1), |b| {
// b.iter(|| logical_plan(&clickbench_ctx, sql))
// });
// }
for (i, sql) in clickbench_queries.iter().enumerate() {
c.bench_function(&format!("physical_plan_clickbench_q{}", i + 1), |b| {
b.iter(|| physical_plan(&clickbench_ctx, &rt, sql))
});
}
// c.bench_function("logical_plan_clickbench_all", |b| {
// b.iter(|| {
// for sql in &clickbench_queries {
// logical_plan(&clickbench_ctx, sql)
// }
// })
// });
c.bench_function("physical_plan_clickbench_all", |b| {
b.iter(|| {
for sql in &clickbench_queries {
physical_plan(&clickbench_ctx, &rt, sql)
}
})
});
c.bench_function("with_param_values_many_columns", |b| {
benchmark_with_param_values_many_columns(&ctx, &rt, b);
});
}