in courses/machine_learning/asl/open_project/ASL_energy_prediction_models/meta_models/model_module/trainer/model.py [0:0]
def encoder_decoder_stacked_lstm_regression(features, labels, mode, params):
print("\nencoder_decoder_stacked_lstm_regression: features = {}".format(features))
print("encoder_decoder_stacked_lstm_regression: labels = {}".format(labels)) # shape = (current_batch_size, output_sequence_length)
print("encoder_decoder_stacked_lstm_regression: mode = {}".format(mode))
print("encoder_decoder_stacked_lstm_regression: params = {}".format(params))
# 0. Get input sequence tensor into correct shape
# Get dynamic batch size in case there was a partially filled batch
current_batch_size = tf.shape(features["price"])[0]
print("encoder_decoder_stacked_lstm_regression: current_batch_size = {}".format(current_batch_size))
# Get the number of features
number_of_features = len(features)
print("encoder_decoder_stacked_lstm_regression: number_of_features = {}".format(number_of_features))
# Stack all of the features into a 3-D tensor
X = tf.stack(values = features.values(), axis = 2) # shape = (current_batch_size, input_sequence_length, number_of_features)
print("encoder_decoder_stacked_lstm_regression: X = {}".format(X))
# Unstack all of 3-D features tensor into a sequence(list) of 2-D tensors of shape = (current_batch_size, number_of_features)
X_sequence = tf.unstack(value = X, num = params['input_sequence_length'], axis = 1)
print("encoder_decoder_stacked_lstm_regression: X_sequence = {}".format(X_sequence))
################################################################################
# 1. Create encoder of encoder-decoder LSTM stacks
# First create a list of LSTM cells using our list of lstm hidden unit sizes
lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units = units, forget_bias = 1.0, state_is_tuple = True) for units in params["lstm_hidden_units"]] # list of LSTM cells
print("encoder_decoder_stacked_lstm_regression: lstm_cells = {}".format(lstm_cells))
# Next apply a dropout wrapper to our stack of LSTM cells, in this case just on the outputs
dropout_lstm_cells = [tf.nn.rnn_cell.DropoutWrapper(cell = lstm_cells[cell_index],
input_keep_prob = 1.0,
output_keep_prob = params["lstm_dropout_output_keep_probs"][cell_index],
state_keep_prob = 1.0) for cell_index in range(len(lstm_cells))]
print("encoder_decoder_stacked_lstm_regression: dropout_lstm_cells = {}".format(dropout_lstm_cells))
# Create a stack of layers of LSTM cells
stacked_lstm_cells = tf.contrib.rnn.MultiRNNCell(cells = dropout_lstm_cells, state_is_tuple = True) # combines list into MultiRNNCell object
print("encoder_decoder_stacked_lstm_regression: stacked_lstm_cells = {}".format(stacked_lstm_cells))
# Create the encoder variable scope
with tf.variable_scope("encoder"):
# Clone the stacked_lstm_cells subgraph since we will be using a copy for the encoder side and the decoder side
encoder_cells = copy.deepcopy(stacked_lstm_cells)
print("encoder_decoder_stacked_lstm_regression: encoder_cells = {}".format(encoder_cells))
# Encode the input sequence using our encoder stack of LSTMs
encoder_outputs, encoder_final_state = tf.nn.static_rnn(cell = encoder_cells, inputs = X_sequence, dtype = tf.float32)
print("encoder_decoder_stacked_lstm_regression: encoder_outputs = {}".format(encoder_outputs)) # list input_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: encoder_final_state = {}".format(encoder_final_state)) # tuple of final encoder c_state and h_state
################################################################################
# 2. Create decoder of encoder-decoder LSTM stacks
# The rnn_decoder function takes labels during TRAIN/EVAL and a start token followed by its previous predictions during PREDICT
# Starts with an intial state of the final encoder states
def rnn_decoder(decoder_inputs, initial_state, cell, inference):
# Create the decoder variable scope
with tf.variable_scope("decoder"):
# Load in our initial state from our encoder
state = initial_state # tuple of final encoder c_state and h_state
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: state = {}".format(state))
# Create an empty list to store our hidden state output for every timestep
outputs = []
# Begin with no previous output
previous_output = None
# Loop over all of our decoder_inputs which will be output_sequence_length long
for index, decoder_input in enumerate(decoder_inputs):
# If there has been a previous output then we will determine the next input
if previous_output is not None:
# Create the input layer to our DNN
network = previous_output # shape = (current_batch_size, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: network = {}".format(network))
# Create our dnn variable scope
with tf.variable_scope(name_or_scope = "dnn", reuse = tf.AUTO_REUSE):
# Add hidden layers with the given number of units/neurons per layer
for units in params['dnn_hidden_units']:
network = tf.layers.dense(inputs = network, units = units, activation = tf.nn.relu) # shape = (current_batch_size, dnn_hidden_units[i])
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: network = {}, units = {}".format(network, units))
# Connect the final hidden layer to a dense layer with no activation to get the logits
logits = tf.layers.dense(inputs = network, units = 1, activation = None) # shape = (current_batch_size, 1)
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: logits = {}\n".format(logits))
# If we are in inference then we will overwrite our next decoder_input with the logits we just calculated.
# Otherwise, we leave the decoder_input input as it was from the enumerated list
# We have to calculate the logits even when not using them so that the correct dnn subgraph will be generated here and after the encoder-decoder for both training and inference
if inference == True:
decoder_input = logits # shape = (current_batch_size, 1)
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: decoder_input = {}\n".format(decoder_input))
# If this isn't our first time through the loop, just reuse(share) the same variables for each iteration within the current variable scope
if index > 0:
tf.get_variable_scope().reuse_variables()
# Run the decoder input through the decoder stack picking up from the previous state
output, state = cell(decoder_input, state)
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: output = {}".format(output)) # shape = (current_batch_size, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: rnn_decoder: state = {}".format(state)) # tuple of final decoder c_state and h_state
# Append the current decoder hidden state output to the outputs list
outputs.append(output) # growing list eventually output_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])
# Set the previous output to the output just calculated
previous_output = output # shape = (current_batch_size, lstm_hidden_units[-1])
return outputs, state
# Encoder-decoders work differently during training/evaluation and inference so we will have two separate subgraphs for each
if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
# Break 2-D labels tensor into a list of 1-D tensors
unstacked_labels = tf.unstack(value = labels, num = params['output_sequence_length'], axis = 1) # list of output_sequence_length long of shape = (current_batch_size,)
print("encoder_decoder_stacked_lstm_regression: unstacked_labels = {}".format(unstacked_labels))
# Expand each 1-D label tensor back into a 2-D tensor
expanded_unstacked_labels = [tf.expand_dims(input = tensor, axis = -1) for tensor in unstacked_labels] # list of output_sequence_length long of shape = (current_batch_size, 1)
print("encoder_decoder_stacked_lstm_regression: expanded_unstacked_labels = {}".format(expanded_unstacked_labels))
# Call our decoder using the labels as our inputs, the encoder final state as our initial state, our other LSTM stack as our cells, and inference set to false
decoder_outputs, decoder_states = rnn_decoder(decoder_inputs = expanded_unstacked_labels, initial_state = encoder_final_state, cell = stacked_lstm_cells, inference = False)
else:
# Since this is inference create fake labels. The list length needs to be the output sequence length even though only the first element is actually used (as our go signal)
fake_labels = [tf.zeros(shape = [current_batch_size, 1]) for _ in range(params['output_sequence_length'])]
print("encoder_decoder_stacked_lstm_regression: fake_labels = {}".format(fake_labels))
# Call our decoder using fake labels as our inputs, the encoder final state as our initial state, our other LSTM stack as our cells, and inference set to true
decoder_outputs, decoder_states = rnn_decoder(decoder_inputs = fake_labels, initial_state = encoder_final_state, cell = stacked_lstm_cells, inference = True)
print("encoder_decoder_stacked_lstm_regression: decoder_outputs = {}".format(decoder_outputs)) # list output_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: decoder_states = {}".format(decoder_states)) # tuple of final decoder c_state and h_state
# Stack together the list of decoder output tensors into one
stacked_decoder_outputs = tf.stack(values = decoder_outputs, axis = 0) # shape = (current_batch_size * output_sequence_length, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: stacked_decoder_outputs = {}".format(stacked_decoder_outputs))
################################################################################
# 3. Create the DNN structure now after the encoder-decoder LSTM stack
# Create the input layer to our DNN
network = stacked_decoder_outputs # shape = (current_batch_size * output_sequence_length, lstm_hidden_units[-1])
print("encoder_decoder_stacked_lstm_regression: network = {}".format(network))
# Reuse the same variable scope as we used within our decoder (for inference)
with tf.variable_scope(name_or_scope = "dnn", reuse = tf.AUTO_REUSE):
# Add hidden layers with the given number of units/neurons per layer
for units in params['dnn_hidden_units']:
network = tf.layers.dense(inputs = network, units = units, activation = tf.nn.relu) # shape = (current_batch_size * output_sequence_length, dnn_hidden_units[i])
print("encoder_decoder_stacked_lstm_regression: network = {}, units = {}".format(network, units))
# Connect the final hidden layer to a dense layer with no activation to get the logits
logits = tf.layers.dense(inputs = network, units = 1, activation = None) # shape = (current_batch_size * output_sequence_length, 1)
print("encoder_decoder_stacked_lstm_regression: logits = {}\n".format(logits))
# Now that we are through the final DNN for each sequence element for each example in the batch, reshape the predictions to match our labels
predictions = tf.reshape(tensor = logits, shape = [current_batch_size, params['output_sequence_length']]) # shape = (current_batch_size, output_sequence_length)
print("encoder_decoder_stacked_lstm_regression: predictions = {}\n".format(predictions))
# 3. Loss function, training/eval ops
if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
loss = tf.losses.mean_squared_error(labels = labels, predictions = predictions)
train_op = tf.contrib.layers.optimize_loss(
loss = loss,
global_step = tf.train.get_global_step(),
learning_rate = params['learning_rate'],
optimizer = "Adam")
eval_metric_ops = {
"rmse": tf.metrics.root_mean_squared_error(labels = labels, predictions = predictions),
"mae": tf.metrics.mean_absolute_error(labels = labels, predictions = predictions)
}
else:
loss = None
train_op = None
eval_metric_ops = None
# 4. Create predictions
predictions_dict = {"predicted": predictions}
# 5. Create export outputs
export_outputs = {"predict_export_outputs": tf.estimator.export.PredictOutput(outputs = predictions)}
# 6. Return EstimatorSpec
return tf.estimator.EstimatorSpec(
mode = mode,
predictions = predictions_dict,
loss = loss,
train_op = train_op,
eval_metric_ops = eval_metric_ops,
export_outputs = export_outputs)