in de/cli.py [0:0]
def param_impact(file, directory, row_group_size, data_page_size):
Mi = 1024 * 1024
if row_group_size:
param_name = "row_group_size"
param_default = 2**20
param_values = [2**i for i in range(10, 22)]
elif data_page_size:
param_name = "data_page_size"
param_default = 2**20
param_values = [2**i for i in range(15, 23)]
else:
print("Please specify either --row-group-size or --max-page-size")
sys.exit(1)
results, overall_result = calculate_parameter_impact(
file, directory, param_name, param_values, param_default
)
for param_value, result in results.items():
print(
f"{param_name}: {param_value}\n"
f"Deduplication ratio: {result['dedup_ratio']:.2%} ({naturalsize(result['chunk_bytes'])} / {naturalsize(result['total_len'])})\n"
f"XTool deduplication ratio: {result['xtool_dedup_ratio']:.2%} ({naturalsize(result['transmitted_xtool_bytes'])} / {naturalsize(result['total_len'])})\n"
)
print(f"Overall deduplication ratio over {len(results)} files:")
print(
f"Overall deduplication ratio: {overall_result['dedup_ratio']:.2%} ({naturalsize(overall_result['chunk_bytes'])} / {naturalsize(overall_result['total_len'])})\n"
f"XTool overall deduplication ratio: {overall_result['xtool_dedup_ratio']:.2%} ({naturalsize(overall_result['transmitted_xtool_bytes'])} / {naturalsize(overall_result['total_len'])})\n"
)
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=param_values,
y=[result["dedup_ratio"] for result in results.values()],
mode="lines+markers",
name="DE Dedup Ratio",
marker=dict(symbol="circle"),
)
)
fig.add_trace(
go.Scatter(
x=param_values,
y=[result["xtool_dedup_ratio"] for result in results.values()],
mode="lines+markers",
name="XTool Dedup Ratio",
marker=dict(symbol="square"),
)
)
fig.update_layout(
title="Deduplication Ratios vs " + param_name,
xaxis=dict(title=param_name, type="log", dtick=1, tickformat=".2s"),
yaxis=dict(title="Deduplication Ratio", tickformat=".2%"),
legend=dict(title="Metric"),
template="plotly_white",
)
fig.show()