fn criterion_benchmark()

in datafusion/core/benches/sql_planner.rs [223:514]


fn criterion_benchmark(c: &mut Criterion) {
    // verify that we can load the clickbench data prior to running the benchmark
    if !PathBuf::from(format!("{BENCHMARKS_PATH_1}{CLICKBENCH_DATA_PATH}")).exists()
        && !PathBuf::from(format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}")).exists()
    {
        panic!("benchmarks/data/hits_partitioned/ could not be loaded. Please run \
         'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark")
    }

    let ctx = create_context();
    let rt = Runtime::new().unwrap();

    // Test simplest
    // https://github.com/apache/datafusion/issues/5157
    c.bench_function("logical_select_one_from_700", |b| {
        b.iter(|| logical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
    });

    // Test simplest
    // https://github.com/apache/datafusion/issues/5157
    c.bench_function("physical_select_one_from_700", |b| {
        b.iter(|| physical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
    });

    // Test simplest
    c.bench_function("logical_select_all_from_1000", |b| {
        b.iter(|| logical_plan(&ctx, &rt, "SELECT * FROM t1000"))
    });

    // Test simplest
    c.bench_function("physical_select_all_from_1000", |b| {
        b.iter(|| physical_plan(&ctx, &rt, "SELECT * FROM t1000"))
    });

    c.bench_function("logical_trivial_join_low_numbered_columns", |b| {
        b.iter(|| {
            logical_plan(
                &ctx,
                &rt,
                "SELECT t1.a2, t2.b2  \
                 FROM t1, t2 WHERE a1 = b1",
            )
        })
    });

    c.bench_function("logical_trivial_join_high_numbered_columns", |b| {
        b.iter(|| {
            logical_plan(
                &ctx,
                &rt,
                "SELECT t1.a99, t2.b99  \
                 FROM t1, t2 WHERE a199 = b199",
            )
        })
    });

    c.bench_function("logical_aggregate_with_join", |b| {
        b.iter(|| {
            logical_plan(
                &ctx,
                &rt,
                "SELECT t1.a99, MIN(t2.b1), MAX(t2.b199), AVG(t2.b123), COUNT(t2.b73)  \
                 FROM t1 JOIN t2 ON t1.a199 = t2.b199 GROUP BY t1.a99",
            )
        })
    });

    c.bench_function("physical_select_aggregates_from_200", |b| {
        let mut aggregates = String::new();
        for i in 0..200 {
            if i > 0 {
                aggregates.push_str(", ");
            }
            aggregates.push_str(format!("MAX(a{})", i).as_str());
        }
        let query = format!("SELECT {} FROM t1", aggregates);
        b.iter(|| {
            physical_plan(&ctx, &rt, &query);
        });
    });

    // Benchmark for Physical Planning Joins
    c.bench_function("physical_join_consider_sort", |b| {
        b.iter(|| {
            physical_plan(
                &ctx,
                &rt,
                "SELECT t1.a7, t2.b8  \
                 FROM t1, t2 WHERE a7 = b7 \
                 ORDER BY a7",
            );
        });
    });

    c.bench_function("physical_theta_join_consider_sort", |b| {
        b.iter(|| {
            physical_plan(
                &ctx,
                &rt,
                "SELECT t1.a7, t2.b8  \
                 FROM t1, t2 WHERE a7 < b7 \
                 ORDER BY a7",
            );
        });
    });

    c.bench_function("physical_many_self_joins", |b| {
        b.iter(|| {
            physical_plan(
                &ctx,
                &rt,
                "SELECT ta.a9, tb.a10, tc.a11, td.a12, te.a13, tf.a14 \
                 FROM t1 AS ta, t1 AS tb, t1 AS tc, t1 AS td, t1 AS te, t1 AS tf \
                 WHERE ta.a9 = tb.a10 AND tb.a10 = tc.a11 AND tc.a11 = td.a12 AND \
                 td.a12 = te.a13 AND te.a13 = tf.a14",
            );
        });
    });

    c.bench_function("physical_unnest_to_join", |b| {
        b.iter(|| {
            physical_plan(
                &ctx,
                &rt,
                "SELECT t1.a7  \
                 FROM t1 WHERE a7 = (SELECT b8 FROM t2)",
            );
        });
    });

    c.bench_function("physical_intersection", |b| {
        b.iter(|| {
            physical_plan(
                &ctx,
                &rt,
                "SELECT t1.a7 FROM t1  \
                 INTERSECT SELECT t2.b8 FROM t2",
            );
        });
    });
    // these two queries should be equivalent
    c.bench_function("physical_join_distinct", |b| {
        b.iter(|| {
            logical_plan(
                &ctx,
                &rt,
                "SELECT DISTINCT t1.a7  \
                 FROM t1, t2 WHERE t1.a7 = t2.b8",
            );
        });
    });

    // -- Sorted Queries --
    register_union_order_table(&ctx, 100, 1000);

    // this query has many expressions in its sort order so stresses
    // order equivalence validation
    c.bench_function("physical_sorted_union_orderby", |b| {
        // SELECT ... UNION ALL ...
        let query = union_orderby_query(20);
        b.iter(|| physical_plan(&ctx, &rt, &query))
    });

    // --- TPC-H ---

    let tpch_ctx = register_defs(SessionContext::new(), tpch_schemas());

    let tpch_queries = [
        "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
        "q14", // "q15", q15 has multiple SQL statements which is not supported
        "q16", "q17", "q18", "q19", "q20", "q21", "q22",
    ];

    let benchmarks_path = if PathBuf::from(BENCHMARKS_PATH_1).exists() {
        BENCHMARKS_PATH_1
    } else {
        BENCHMARKS_PATH_2
    };

    for q in tpch_queries {
        let sql =
            std::fs::read_to_string(format!("{benchmarks_path}queries/{q}.sql")).unwrap();
        c.bench_function(&format!("physical_plan_tpch_{}", q), |b| {
            b.iter(|| physical_plan(&tpch_ctx, &rt, &sql))
        });
    }

    let all_tpch_sql_queries = tpch_queries
        .iter()
        .map(|q| {
            std::fs::read_to_string(format!("{benchmarks_path}queries/{q}.sql")).unwrap()
        })
        .collect::<Vec<_>>();

    c.bench_function("physical_plan_tpch_all", |b| {
        b.iter(|| {
            for sql in &all_tpch_sql_queries {
                physical_plan(&tpch_ctx, &rt, sql)
            }
        })
    });

    // c.bench_function("logical_plan_tpch_all", |b| {
    //     b.iter(|| {
    //         for sql in &all_tpch_sql_queries {
    //             logical_plan(&tpch_ctx, sql)
    //         }
    //     })
    // });

    // --- TPC-DS ---

    let tpcds_ctx = register_defs(SessionContext::new(), tpcds_schemas());
    let tests_path = if PathBuf::from("./tests/").exists() {
        "./tests/"
    } else {
        "datafusion/core/tests/"
    };

    let raw_tpcds_sql_queries = (1..100)
        .map(|q| std::fs::read_to_string(format!("{tests_path}tpc-ds/{q}.sql")).unwrap())
        .collect::<Vec<_>>();

    // some queries have multiple statements
    let all_tpcds_sql_queries = raw_tpcds_sql_queries
        .iter()
        .flat_map(|sql| sql.split(';').filter(|s| !s.trim().is_empty()))
        .collect::<Vec<_>>();

    c.bench_function("physical_plan_tpcds_all", |b| {
        b.iter(|| {
            for sql in &all_tpcds_sql_queries {
                physical_plan(&tpcds_ctx, &rt, sql)
            }
        })
    });

    // c.bench_function("logical_plan_tpcds_all", |b| {
    //     b.iter(|| {
    //         for sql in &all_tpcds_sql_queries {
    //             logical_plan(&tpcds_ctx, sql)
    //         }
    //     })
    // });

    // -- clickbench --

    let queries_file =
        File::open(format!("{benchmarks_path}queries/clickbench/queries.sql")).unwrap();
    let extended_file =
        File::open(format!("{benchmarks_path}queries/clickbench/extended.sql")).unwrap();

    let clickbench_queries: Vec<String> = BufReader::new(queries_file)
        .lines()
        .chain(BufReader::new(extended_file).lines())
        .map(|l| l.expect("Could not parse line"))
        .collect_vec();

    let clickbench_ctx = register_clickbench_hits_table(&rt);

    // for (i, sql) in clickbench_queries.iter().enumerate() {
    //     c.bench_function(&format!("logical_plan_clickbench_q{}", i + 1), |b| {
    //         b.iter(|| logical_plan(&clickbench_ctx, sql))
    //     });
    // }

    for (i, sql) in clickbench_queries.iter().enumerate() {
        c.bench_function(&format!("physical_plan_clickbench_q{}", i + 1), |b| {
            b.iter(|| physical_plan(&clickbench_ctx, &rt, sql))
        });
    }

    // c.bench_function("logical_plan_clickbench_all", |b| {
    //     b.iter(|| {
    //         for sql in &clickbench_queries {
    //             logical_plan(&clickbench_ctx, sql)
    //         }
    //     })
    // });

    c.bench_function("physical_plan_clickbench_all", |b| {
        b.iter(|| {
            for sql in &clickbench_queries {
                physical_plan(&clickbench_ctx, &rt, sql)
            }
        })
    });

    c.bench_function("with_param_values_many_columns", |b| {
        benchmark_with_param_values_many_columns(&ctx, &rt, b);
    });
}