in tools/utilities/pythonlibs/audio/training/train_classifier.py [0:0]
def fit(self, training_data, validation_data, options, model, device=None, detail=False, run=None):
"""
Perform the training. This is not called "train" because the base class already defines
that method with a different meaning. The base class "train" method puts the Module into
"training mode".
"""
print("Training {} using {} rows of featurized training input...".format(self.name(), training_data.num_rows))
if training_data.mean is not None:
self.mean = torch.from_numpy(np.array([[training_data.mean]])).to(device)
self.std = torch.from_numpy(np.array([[training_data.std]])).to(device)
else:
self.mean = None
self.std = None
start = time.time()
loss_function = nn.NLLLoss()
initial_rate = options.learning_rate
lr_scheduler = options.lr_scheduler
oo = options.optimizer_options
self.training = True
if options.optimizer == "Adadelta":
optimizer = optim.Adadelta(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
rho=oo.rho, eps=oo.eps)
elif options.optimizer == "Adagrad":
optimizer = optim.Adagrad(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
lr_decay=oo.lr_decay)
elif options.optimizer == "Adam":
optimizer = optim.Adam(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
betas=oo.betas, eps=oo.eps)
elif options.optimizer == "Adamax":
optimizer = optim.Adamax(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
betas=oo.betas, eps=oo.eps)
elif options.optimizer == "ASGD":
optimizer = optim.ASGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
lambd=oo.lambd, alpha=oo.alpha, t0=oo.t0)
elif options.optimizer == "RMSprop":
optimizer = optim.RMSprop(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
eps=oo.eps, alpha=oo.alpha, momentum=oo.momentum, centered=oo.centered)
elif options.optimizer == "Rprop":
optimizer = optim.Rprop(self.parameters(), lr=initial_rate, etas=oo.etas,
step_sizes=oo.step_sizes)
elif options.optimizer == "SGD":
optimizer = optim.SGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay,
momentum=oo.momentum, dampening=oo.dampening)
print(optimizer)
num_epochs = options.max_epochs
batch_size = options.batch_size
learning_rate = options.learning_rate
lr_min = options.lr_min
lr_peaks = options.lr_peaks
ticks = training_data.num_rows / batch_size # iterations per epoch
# Calculation of total iterations in non-rolling vs rolling training
# ticks = num_rows/batch_size (total number of iterations per epoch)
# Non-Rolling Training:
# Total Iteration = num_epochs * ticks
# Rolling Training:
# irl = Initial_rolling_length (We are using 2)
# If num_epochs <= max_rolling_length:
# Total Iterations = sum(range(irl, irl + num_epochs))
# If num_epochs > max_rolling_length:
# Total Iterations = sum(range(irl, irl + max_rolling_length)) + (num_epochs - max_rolling_length)*ticks
if options.rolling:
rolling_length = 2
max_rolling_length = int(ticks)
if max_rolling_length > options.max_rolling_length + rolling_length:
max_rolling_length = options.max_rolling_length + rolling_length
bag_count = 100
hidden_bag_size = batch_size * bag_count
if num_epochs + rolling_length < max_rolling_length:
max_rolling_length = num_epochs + rolling_length
total_iterations = sum(range(rolling_length, max_rolling_length))
if num_epochs + rolling_length > max_rolling_length:
epochs_remaining = num_epochs + rolling_length - max_rolling_length
total_iterations += epochs_remaining * training_data.num_rows / batch_size
ticks = total_iterations / num_epochs
else:
total_iterations = ticks * num_epochs
gamma = options.lr_gamma
if not lr_min:
lr_min = learning_rate
scheduler = None
if lr_scheduler == "TriangleLR":
steps = lr_peaks * 2 + 1
stepsize = num_epochs / steps
scheduler = TriangularLR(optimizer, stepsize * ticks, lr_min, learning_rate, gamma)
elif lr_scheduler == "CosineAnnealingLR":
# divide by odd number to finish on the minimum learning rate
cycles = lr_peaks * 2 + 1
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_iterations / cycles,
eta_min=lr_min)
elif lr_scheduler == "ExponentialLR":
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma)
elif lr_scheduler == "StepLR":
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=options.lr_step_size, gamma=gamma)
elif lr_scheduler == "ExponentialResettingLR":
reset = (num_epochs * ticks) / 3 # reset at the 1/3 mark.
scheduler = ExponentialResettingLR(optimizer, gamma, reset)
# optimizer = optim.Adam(model.parameters(), lr=0.0001)
log = []
for epoch in range(num_epochs):
self.train()
if options.rolling:
rolling_length += 1
if rolling_length <= max_rolling_length:
hidden1_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, model.hidden_units],
dtype=np.float32)).to(device)
if model.architecture == 'LSTM':
cell1_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, model.hidden_units],
dtype=np.float32)).to(device)
if model.num_layers >= 2:
hidden2_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, model.hidden_units],
dtype=np.float32)).to(device)
if model.architecture == 'LSTM':
cell2_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, model.hidden_units],
dtype=np.float32)).to(device)
if model.num_layers == 3:
hidden3_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, training_data.num_keywords],
dtype=np.float32)).to(device)
if model.architecture == 'LSTM':
cell3_bag = torch.from_numpy(np.zeros([1, hidden_bag_size, training_data.num_keywords],
dtype=np.float32)).to(device)
for i_batch, (audio, labels) in enumerate(training_data.get_data_loader(batch_size)):
if not self.batch_first:
audio = audio.transpose(1, 0) # GRU wants seq,batch,feature
if device:
audio = audio.to(device)
labels = labels.to(device)
# Also, we need to clear out the hidden state,
# detaching it from its history on the last instance.
if options.rolling:
if rolling_length <= max_rolling_length:
if (i_batch + 1) % rolling_length == 0:
self.init_hidden()
break
shuffled_indices = list(range(hidden_bag_size))
np.random.shuffle(shuffled_indices)
temp_indices = shuffled_indices[:batch_size]
if model.architecture == 'LSTM':
if self.hidden1 is not None:
hidden1_bag[:, temp_indices, :], cell1_bag[:, temp_indices, :] = self.hidden1
self.hidden1 = (hidden1_bag[:, 0:batch_size, :], cell1_bag[:, 0:batch_size, :])
if model.num_layers >= 2:
hidden2_bag[:, temp_indices, :], cell2_bag[:, temp_indices, :] = self.hidden2
self.hidden2 = (hidden2_bag[:, 0:batch_size, :], cell2_bag[:, 0:batch_size, :])
if model.num_layers == 3:
hidden3_bag[:, temp_indices, :], cell3_bag[:, temp_indices, :] = self.hidden3
self.hidden3 = (hidden3_bag[:, 0:batch_size, :], cell3_bag[:, 0:batch_size, :])
else:
if self.hidden1 is not None:
hidden1_bag[:, temp_indices, :] = self.hidden1
self.hidden1 = hidden1_bag[:, 0:batch_size, :]
if model.num_layers >= 2:
hidden2_bag[:, temp_indices, :] = self.hidden2
self.hidden2 = hidden2_bag[:, 0:batch_size, :]
if model.num_layers == 3:
hidden3_bag[:, temp_indices, :] = self.hidden3
self.hidden3 = hidden3_bag[:, 0:batch_size, :]
else:
self.init_hidden()
# Before the backward pass, use the optimizer object to zero all of the
# gradients for the variables it will update (which are the learnable
# weights of the model). This is because by default, gradients are
# accumulated in buffers( i.e, not overwritten) whenever .backward()
# is called. Checkout docs of torch.autograd.backward for more details.
optimizer.zero_grad()
# optionally normalize the audio
if self.mean is not None:
audio = (audio - self.mean) / self.std
# Run our forward pass.
keyword_scores = self(audio)
# Compute the loss, gradients
loss = loss_function(keyword_scores, labels)
# Backward pass: compute gradient of the loss with respect to all the learnable
# parameters of the model. Internally, the parameters of each Module are stored
# in Tensors with requires_grad=True, so this call will compute gradients for
# all learnable parameters in the model.
loss.backward()
# move to next learning rate
if scheduler:
scheduler.step()
# Calling the step function on an Optimizer makes an update to its parameters
# applying the gradients we computed during back propagation
optimizer.step()
learning_rate = optimizer.param_groups[0]['lr']
if detail:
learning_rate = optimizer.param_groups[0]['lr']
log += [{'iteration': iteration, 'loss': loss.item(), 'learning_rate': learning_rate}]
# Find the best prediction in each sequence and return it's accuracy
passed, total, rate = self.evaluate(validation_data, batch_size, device)
learning_rate = optimizer.param_groups[0]['lr']
current_loss = float(loss.item())
print("Epoch {}, Loss {:.3f}, Validation Accuracy {:.3f}, Learning Rate {}".format(
epoch, current_loss, rate * 100, learning_rate))
log += [{'epoch': epoch, 'loss': current_loss, 'accuracy': rate, 'learning_rate': learning_rate}]
if run is not None:
run.log('progress', epoch / num_epochs)
run.log('epoch', epoch)
run.log('accuracy', rate)
run.log('loss', current_loss)
run.log('learning_rate', learning_rate)
end = time.time()
self.training = False
print("Trained in {:.2f} seconds".format(end - start))
return log