in benchmark/python/sparse/dot.py [0:0]
def test_dot_synthetic(data_dict):
"""benchmark sparse mxnet dot and scipy dot operator with matrices of given density.
`t_sparse` is the runtime of the invoked sparse dot operator in ms, while `t_dense` is the
runtime of dot(dns, dns), with the same matrices except that they are in default storage type.
"""
# Benchmark MXNet and Scipys dot operator
def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype,
lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"):
set_default_device(ctx)
assert fw == "mxnet" or fw == "scipy"
# Set funcs
dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot
dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot
# Create matrix instances
lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution)
# only uniform distribution supported for rhs
if rhs_stype == 'csr':
rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution=distribution)
else:
rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform")
lhs_dns = None
rhs_dns = None
dense_cost = None
sparse_cost = None
if fw == "mxnet":
lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default')
rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default')
# One warm up run, verify correctness
out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs)
out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs)
assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1)
sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs)
dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs)
else:
lhs_dns = lhs_nd.asnumpy()
rhs_dns = rhs_nd.asnumpy()
lhs_nd = sp.csr_matrix(lhs_nd.asnumpy())
rhs_nd = rhs_nd.asnumpy()
# One warm up run, verify correctness
lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd
out = dot_func_sparse(lhs_nd_copy, rhs_dns)
sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd)
dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns)
speedup = dense_cost / sparse_cost
# Print results
m = lhs_shape[0]
k = lhs_shape[1]
n = rhs_shape[1]
result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}'
results = result_pattern.format(lhs_den*100,
rhs_den*100,
str(ctx),
m,
k,
n,
sparse_cost*1000,
dense_cost*1000,
speedup)
print(results)
def print_benchmark_info(lhs, rhs, lhs_trans, fw):
trans_str = "^T" if lhs_trans else ""
print("========================================================")
print(f" {fw} sparse dot benchmark: dot({lhs}, {rhs}) = {rhs} ")
print(
f" (matrix multiplication: (m x k){trans_str} * (k x n) = m x n) ")
print("========================================================")
headline_pattern = '{:>15} {:>15} {:>10} {:>8} {:>8} {:>8} {:>13} {:>13} {:>8}'
headline = headline_pattern.format('lhs_density(%)',
'rhs_density(%)',
'context',
'm', 'k', 'n',
't_sparse(ms)',
't_dense(ms)',
'speedup')
print(headline)
def run_benchmark(ctx=None, lhs="csr", lhs_trans=False, rhs="dns", fw="mxnet", rhs_density=1,
distribution="uniform"):
if rhs_density > 1 or rhs_density < 0:
raise ValueError("rhs_density has to be between 0 and 1")
print_benchmark_info(lhs, rhs, lhs_trans, fw)
if rhs == "csr":
lhs_stype = "default"
rhs_stype = "csr"
assert (lhs_stype == 'default'), "Only dot(default, csr) supported"
# Arrange dimensions according to use case. For below csr will have num_rows << num_cols
feature_dim_list = data_dict['batch_size']
batch_size_list = data_dict['m']
output_dim_list = data_dict['feature_dim']
density_list = data_dict['density']
default_output_index = data_dict['default_index']['feature_dim']
default_density_index = data_dict['default_index']['density']
default_feature_index = data_dict['default_index']['batch_size']
default_batch_size_index = data_dict['default_index']['output_dim']
num_repeat = data_dict['num_repeat']
else:
lhs_stype = "csr"
rhs_stype = "row_sparse" if rhs == "rsp" else "default"
feature_dim_list = data_dict['feature_dim']
output_dim_list = data_dict['m']
batch_size_list = data_dict['batch_size']
density_list = data_dict['density']
default_output_index = data_dict['default_index']['output_dim']
default_batch_size_index = data_dict['default_index']['batch_size']
default_feature_index = data_dict['default_index']['feature_dim']
default_density_index = data_dict['default_index']['density']
num_repeat = data_dict['num_repeat']
for output_dim in output_dim_list:
if lhs_trans:
output_row_dim = batch_size_list[default_batch_size_index]
else:
output_row_dim = feature_dim_list[default_feature_index]
bench_dot((batch_size_list[default_batch_size_index],
feature_dim_list[default_feature_index]),
(output_row_dim, output_dim),
lhs_stype, rhs_stype,
density_list[default_density_index], rhs_density,
lhs_trans, ctx, num_repeat=num_repeat,
fw=fw, distribution=distribution)
for feature_dim in feature_dim_list:
if lhs_trans:
output_row_dim = batch_size_list[default_batch_size_index]
else:
output_row_dim = feature_dim
bench_dot((batch_size_list[default_batch_size_index], feature_dim),
(output_row_dim, output_dim_list[default_output_index]),
lhs_stype, rhs_stype, density_list[default_density_index], rhs_density,
lhs_trans, ctx, num_repeat=num_repeat, fw=fw, distribution=distribution)
for batch_size in batch_size_list:
if lhs_trans:
output_row_dim = batch_size
else:
output_row_dim = feature_dim_list[default_feature_index]
bench_dot((batch_size, feature_dim_list[default_feature_index]),
(output_row_dim,
output_dim_list[default_output_index]),
lhs_stype, rhs_stype, density_list[default_density_index],
rhs_density, lhs_trans, ctx, num_repeat=num_repeat,
fw=fw, distribution=distribution)
for density in density_list:
if lhs_trans:
output_row_dim = batch_size_list[default_batch_size_index]
else:
output_row_dim = feature_dim_list[default_feature_index]
bench_dot((batch_size_list[default_batch_size_index],
feature_dim_list[default_feature_index]),
(output_row_dim,
output_dim_list[default_output_index]),
lhs_stype, rhs_stype, density, density, lhs_trans, ctx,
num_repeat=num_repeat, fw=fw, distribution=distribution)
check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(ARGS.num_omp_threads)))
context = mx.gpu() if ARGS.gpu else mx.cpu()
# TODO(anirudh): make the data dicts to config which can be passed at runtime
distributions = ["uniform", "powerlaw"]
for distribution in distributions:
run_benchmark(context, lhs="csr",
rhs="default", lhs_trans=False,
fw="mxnet", rhs_density=1,
distribution=distribution)
run_benchmark(context, lhs="csr",
rhs="default", lhs_trans=True,
fw="mxnet", rhs_density=1,
distribution=distribution)
run_benchmark(context, lhs="csr",
rhs="rsp", lhs_trans=False,
fw="mxnet", rhs_density=0.05,
distribution=distribution)
run_benchmark(context, lhs="default",
rhs="csr", lhs_trans=False,
fw="mxnet", rhs_density=0.001,
distribution=distribution)
if not ARGS.gpu:
run_benchmark(context, lhs="csr",
rhs="default", lhs_trans=False,
fw="scipy", rhs_density=1,
distribution=distribution)
run_benchmark(context, lhs="csr",
rhs="default", lhs_trans=True,
fw="scipy", rhs_density=1,
distribution=distribution)