def param_impact()

in de/cli.py [0:0]


def param_impact(file, directory, row_group_size, data_page_size):
    Mi = 1024 * 1024
    if row_group_size:
        param_name = "row_group_size"
        param_default = 2**20
        param_values = [2**i for i in range(10, 22)]
    elif data_page_size:
        param_name = "data_page_size"
        param_default = 2**20
        param_values = [2**i for i in range(15, 23)]
    else:
        print("Please specify either --row-group-size or --max-page-size")
        sys.exit(1)

    results, overall_result = calculate_parameter_impact(
        file, directory, param_name, param_values, param_default
    )

    for param_value, result in results.items():
        print(
            f"{param_name}: {param_value}\n"
            f"Deduplication ratio: {result['dedup_ratio']:.2%} ({naturalsize(result['chunk_bytes'])} / {naturalsize(result['total_len'])})\n"
            f"XTool deduplication ratio: {result['xtool_dedup_ratio']:.2%} ({naturalsize(result['transmitted_xtool_bytes'])} / {naturalsize(result['total_len'])})\n"
        )

    print(f"Overall deduplication ratio over {len(results)} files:")
    print(
        f"Overall deduplication ratio: {overall_result['dedup_ratio']:.2%} ({naturalsize(overall_result['chunk_bytes'])} / {naturalsize(overall_result['total_len'])})\n"
        f"XTool overall deduplication ratio: {overall_result['xtool_dedup_ratio']:.2%} ({naturalsize(overall_result['transmitted_xtool_bytes'])} / {naturalsize(overall_result['total_len'])})\n"
    )

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=param_values,
            y=[result["dedup_ratio"] for result in results.values()],
            mode="lines+markers",
            name="DE Dedup Ratio",
            marker=dict(symbol="circle"),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=param_values,
            y=[result["xtool_dedup_ratio"] for result in results.values()],
            mode="lines+markers",
            name="XTool Dedup Ratio",
            marker=dict(symbol="square"),
        )
    )
    fig.update_layout(
        title="Deduplication Ratios vs " + param_name,
        xaxis=dict(title=param_name, type="log", dtick=1, tickformat=".2s"),
        yaxis=dict(title="Deduplication Ratio", tickformat=".2%"),
        legend=dict(title="Metric"),
        template="plotly_white",
    )
    fig.show()