in nni/algorithms/feature_engineering/gradient_selector/learnability.py [0:0]
def __init__(self,
PreparedData,
order,
Nminibatch=None,
groups=None,
soft_groups=None,
x0=None,
C=1,
ftransform=torch.sigmoid,
get_train_opt=def_train_opt,
accum_steps=1,
rng=np.random.RandomState(0),
max_norm_clip=1.,
shuffle=True,
device=constants.Device.CPU,
verbose=1):
"""
Parameters
----------
PreparedData : Dataset of PrepareData class
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
Nminibatch : int
Number of rows in a mini batch
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
soft_groups : array-like
optional, shape = [n_features]
Groups of columns come from the same source
Used to encourage sparsity of number of sources selected
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
x0 : torch.tensor
Optional, initialization of x.
C : float
Penalty parameter.
get_train_opt : function
Function that returns a pytorch optimizer, Adam is the default
accum_steps : int
Number of steps
rng : random state
max_norm_clip : float
Maximum allowable size of the gradient
shuffle : bool
Whether or not to shuffle data within the dataloader
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
penalty : int
Constant that multiplies the regularization term.
ftransform : function
Function to transform the x. sigmoid is the default.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
"""
super(Solver, self).__init__()
self.Ntrain, self.D = PreparedData.N, PreparedData.n_features
if groups is not None:
# pylint: disable=E1102
groups = torch.tensor(groups, dtype=torch.long)
self.groups = groups
else:
self.groups = None
if soft_groups is not None:
# pylint: disable=E1102
soft_groups = torch.tensor(soft_groups, dtype=torch.long)
self.soft_D = torch.unique(soft_groups).size()[0]
else:
self.soft_D = None
self.soft_groups = soft_groups
if Nminibatch is None:
Nminibatch = self.Ntrain
else:
if Nminibatch > self.Ntrain:
print('Minibatch larger than sample size.'
+ (' Reducing from %d to %d.'
% (Nminibatch, self.Ntrain)))
Nminibatch = self.Ntrain
if Nminibatch > PreparedData.max_rows:
print('Minibatch larger than mem-allowed.'
+ (' Reducing from %d to %d.' % (Nminibatch,
PreparedData.max_rows)))
Nminibatch = int(np.min([Nminibatch, PreparedData.max_rows]))
self.Nminibatch = Nminibatch
self.accum_steps = accum_steps
if x0 is None:
x0 = torch.zeros(self.D, 1, dtype=torch.get_default_dtype())
self.ftransform = ftransform
self.x = nn.Parameter(x0)
self.max_norm = max_norm_clip
self.device = device
self.verbose = verbose
self.multiclass = PreparedData.classification and PreparedData.n_classes and PreparedData.n_classes > 2
if self.multiclass:
self.n_classes = PreparedData.n_classes
else:
self.n_classes = None
# whether to treat all classes equally
self.balanced = PreparedData.balanced
self.ordinal = PreparedData.ordinal
if (hasattr(PreparedData, 'mappings')
or PreparedData.storage_level == 'disk'):
num_workers = PreparedData.num_workers
elif PreparedData.storage_level == constants.StorageLevel.DENSE:
num_workers = 0
else:
num_workers = 0
if constants.Device.CUDA in device:
pin_memory = False
else:
pin_memory = False
if num_workers == 0:
timeout = 0
else:
timeout = 60
self.ds_train = ChunkDataLoader(
PreparedData,
batch_size=self.Nminibatch,
shuffle=shuffle,
drop_last=True,
num_workers=num_workers,
pin_memory=pin_memory,
timeout=timeout)
self.f_train = LearnabilityMB(self.Nminibatch, self.D,
constants.Coefficients.SLE[order],
self.groups,
binary=PreparedData.classification,
device=self.device)
self.opt_train = get_train_opt(torch.nn.ParameterList([self.x]))
self.it = 0
self.iters_per_epoch = int(np.ceil(len(self.ds_train.dataset)
/ self.ds_train.batch_size))
self.f_train = self.f_train.to(device)
# pylint: disable=E1102
self.w = torch.tensor(
C / (C + 1),
dtype=torch.get_default_dtype(), requires_grad=False)
self.w = self.w.to(device)