in examples/sparse_regression.py [0:0]
def main(args):
# setup hyperparameters for the model
hypers = {'expected_sparsity': max(1.0, args.num_dimensions / 10),
'alpha1': 3.0, 'beta1': 1.0, 'alpha2': 3.0, 'beta2': 1.0, 'alpha3': 1.0,
'c': 1.0}
P = args.num_dimensions
S = args.active_dimensions
Q = args.quadratic_dimensions
# generate artificial dataset
X, Y, expected_thetas, expected_quad_dims = get_data(N=args.num_data, P=P, S=S,
Q=Q, sigma_obs=args.sigma)
loss_fn = Trace_ELBO().differentiable_loss
# We initialize the AutoDelta guide (for MAP estimation) with args.num_trials many
# initial parameters sampled from the vicinity of the median of the prior distribution
# and then continue optimizing with the best performing initialization.
init_losses = []
for restart in range(args.num_restarts):
pyro.clear_param_store()
pyro.set_rng_seed(restart)
guide = AutoDelta(model, init_loc_fn=init_loc_fn)
with torch.no_grad():
init_losses.append(loss_fn(model, guide, X, Y, hypers).item())
pyro.set_rng_seed(np.argmin(init_losses))
pyro.clear_param_store()
guide = AutoDelta(model, init_loc_fn=init_loc_fn)
# Instead of using pyro.infer.SVI and pyro.optim we instead construct our own PyTorch
# optimizer and take charge of gradient-based optimization ourselves.
with poutine.block(), poutine.trace(param_only=True) as param_capture:
guide(X, Y, hypers)
params = list([pyro.param(name).unconstrained() for name in param_capture.trace])
adam = Adam(params, lr=args.lr)
report_frequency = 50
print("Beginning MAP optimization...")
# the optimization loop
for step in range(args.num_steps):
loss = loss_fn(model, guide, X, Y, hypers) / args.num_data
loss.backward()
adam.step()
adam.zero_grad()
# we manually reduce the learning rate according to this schedule
if step in [100, 300, 700, 900]:
adam.param_groups[0]['lr'] *= 0.2
if step % report_frequency == 0 or step == args.num_steps - 1:
print("[step %04d] loss: %.5f" % (step, loss))
print("Expected singleton thetas:\n", expected_thetas.data.numpy())
# we do the final computation using double precision
median = guide.median() # == mode for MAP inference
active_dims, active_quad_dims = \
compute_posterior_stats(X.double(), Y.double(), median['msq'].double(),
median['lambda'].double(), median['eta1'].double(),
median['xisq'].double(), torch.tensor(hypers['c']).double(),
median['sigma'].double())
expected_active_dims = np.arange(S).tolist()
tp_singletons = len(set(active_dims) & set(expected_active_dims))
fp_singletons = len(set(active_dims) - set(expected_active_dims))
fn_singletons = len(set(expected_active_dims) - set(active_dims))
singleton_stats = (tp_singletons, fp_singletons, fn_singletons)
tp_quads = len(set(active_quad_dims) & set(expected_quad_dims))
fp_quads = len(set(active_quad_dims) - set(expected_quad_dims))
fn_quads = len(set(expected_quad_dims) - set(active_quad_dims))
quad_stats = (tp_quads, fp_quads, fn_quads)
# We report how well we did, i.e. did we recover the sparse set of coefficients
# that we expected for our artificial dataset?
print("[SUMMARY STATS]")
print("Singletons (true positive, false positive, false negative): " +
"(%d, %d, %d)" % singleton_stats)
print("Quadratic (true positive, false positive, false negative): " +
"(%d, %d, %d)" % quad_stats)