def encoder_decoder_stacked_lstm_regression()

in courses/machine_learning/asl/open_project/ASL_energy_prediction_models/meta_models/model_module/trainer/model.py [0:0]


def encoder_decoder_stacked_lstm_regression(features, labels, mode, params):
    print("\nencoder_decoder_stacked_lstm_regression: features = {}".format(features))
    print("encoder_decoder_stacked_lstm_regression: labels = {}".format(labels)) # shape = (current_batch_size, output_sequence_length)
    print("encoder_decoder_stacked_lstm_regression: mode = {}".format(mode))
    print("encoder_decoder_stacked_lstm_regression: params = {}".format(params))

    # 0. Get input sequence tensor into correct shape
    # Get dynamic batch size in case there was a partially filled batch
    current_batch_size = tf.shape(features["price"])[0]
    print("encoder_decoder_stacked_lstm_regression: current_batch_size = {}".format(current_batch_size))

    # Get the number of features 
    number_of_features = len(features)
    print("encoder_decoder_stacked_lstm_regression: number_of_features = {}".format(number_of_features))

    # Stack all of the features into a 3-D tensor
    X = tf.stack(values = features.values(), axis = 2) # shape = (current_batch_size, input_sequence_length, number_of_features)
    print("encoder_decoder_stacked_lstm_regression: X = {}".format(X))

    # Unstack all of 3-D features tensor into a sequence(list) of 2-D tensors of shape = (current_batch_size, number_of_features)
    X_sequence = tf.unstack(value = X, num = params['input_sequence_length'], axis = 1)
    print("encoder_decoder_stacked_lstm_regression: X_sequence = {}".format(X_sequence))

    ################################################################################

    # 1. Create encoder of encoder-decoder LSTM stacks
    # First create a list of LSTM cells using our list of lstm hidden unit sizes
    lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units = units, forget_bias = 1.0, state_is_tuple = True) for units in params["lstm_hidden_units"]] # list of LSTM cells
    print("encoder_decoder_stacked_lstm_regression: lstm_cells = {}".format(lstm_cells))

    # Next apply a dropout wrapper to our stack of LSTM cells, in this case just on the outputs
    dropout_lstm_cells = [tf.nn.rnn_cell.DropoutWrapper(cell = lstm_cells[cell_index], 
                                                        input_keep_prob = 1.0, 
                                                        output_keep_prob = params["lstm_dropout_output_keep_probs"][cell_index], 
                                                        state_keep_prob = 1.0) for cell_index in range(len(lstm_cells))]
    print("encoder_decoder_stacked_lstm_regression: dropout_lstm_cells = {}".format(dropout_lstm_cells))

    # Create a stack of layers of LSTM cells
    stacked_lstm_cells = tf.contrib.rnn.MultiRNNCell(cells = dropout_lstm_cells, state_is_tuple = True) # combines list into MultiRNNCell object
    print("encoder_decoder_stacked_lstm_regression: stacked_lstm_cells = {}".format(stacked_lstm_cells))

    # Create the encoder variable scope
    with tf.variable_scope("encoder"):
        # Clone the stacked_lstm_cells subgraph since we will be using a copy for the encoder side and the decoder side
        encoder_cells = copy.deepcopy(stacked_lstm_cells)
        print("encoder_decoder_stacked_lstm_regression: encoder_cells = {}".format(encoder_cells))

        # Encode the input sequence using our encoder stack of LSTMs
        encoder_outputs, encoder_final_state = tf.nn.static_rnn(cell = encoder_cells, inputs = X_sequence, dtype = tf.float32)
        print("encoder_decoder_stacked_lstm_regression: encoder_outputs = {}".format(encoder_outputs)) # list input_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])
        print("encoder_decoder_stacked_lstm_regression: encoder_final_state = {}".format(encoder_final_state)) # tuple of final encoder c_state and h_state

  ################################################################################

    # 2. Create decoder of encoder-decoder LSTM stacks
    # The rnn_decoder function takes labels during TRAIN/EVAL and a start token followed by its previous predictions during PREDICT
    # Starts with an intial state of the final encoder states
    def rnn_decoder(decoder_inputs, initial_state, cell, inference):
        # Create the decoder variable scope
        with tf.variable_scope("decoder"):
            # Load in our initial state from our encoder
            state = initial_state # tuple of final encoder c_state and h_state
            print("encoder_decoder_stacked_lstm_regression: rnn_decoder: state = {}".format(state))

            # Create an empty list to store our hidden state output for every timestep
            outputs = []

            # Begin with no previous output
            previous_output = None

            # Loop over all of our decoder_inputs which will be output_sequence_length long
            for index, decoder_input in enumerate(decoder_inputs):
                # If there has been a previous output then we will determine the next input
                if previous_output is not None:
                    # Create the input layer to our DNN
                    network = previous_output # shape = (current_batch_size, lstm_hidden_units[-1])
                    print("encoder_decoder_stacked_lstm_regression: rnn_decoder: network = {}".format(network))

                    # Create our dnn variable scope
                    with tf.variable_scope(name_or_scope = "dnn", reuse = tf.AUTO_REUSE):
                        # Add hidden layers with the given number of units/neurons per layer
                        for units in params['dnn_hidden_units']:
                            network = tf.layers.dense(inputs = network, units = units, activation = tf.nn.relu) # shape = (current_batch_size, dnn_hidden_units[i])
                            print("encoder_decoder_stacked_lstm_regression: rnn_decoder: network = {}, units = {}".format(network, units))
              
                        # Connect the final hidden layer to a dense layer with no activation to get the logits
                        logits = tf.layers.dense(inputs = network, units = 1, activation = None) # shape = (current_batch_size, 1)
                        print("encoder_decoder_stacked_lstm_regression: rnn_decoder: logits = {}\n".format(logits))
          
                    # If we are in inference then we will overwrite our next decoder_input with the logits we just calculated.
                    # Otherwise, we leave the decoder_input input as it was from the enumerated list
                    # We have to calculate the logits even when not using them so that the correct dnn subgraph will be generated here and after the encoder-decoder for both training and inference
                    if inference == True:
                        decoder_input = logits # shape = (current_batch_size, 1)

                    print("encoder_decoder_stacked_lstm_regression: rnn_decoder: decoder_input = {}\n".format(decoder_input))
        
                # If this isn't our first time through the loop, just reuse(share) the same variables for each iteration within the current variable scope
                if index > 0:
                    tf.get_variable_scope().reuse_variables()

                # Run the decoder input through the decoder stack picking up from the previous state
                output, state = cell(decoder_input, state)
                print("encoder_decoder_stacked_lstm_regression: rnn_decoder: output = {}".format(output)) # shape = (current_batch_size, lstm_hidden_units[-1])
                print("encoder_decoder_stacked_lstm_regression: rnn_decoder: state = {}".format(state)) # tuple of final decoder c_state and h_state

                # Append the current decoder hidden state output to the outputs list
                outputs.append(output) # growing list eventually output_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])

                # Set the previous output to the output just calculated
                previous_output = output # shape = (current_batch_size, lstm_hidden_units[-1])
            return outputs, state
  
    # Encoder-decoders work differently during training/evaluation and inference so we will have two separate subgraphs for each
    if mode == tf.estimator.ModeKeys.TRAIN  or mode == tf.estimator.ModeKeys.EVAL:
        # Break 2-D labels tensor into a list of 1-D tensors
        unstacked_labels = tf.unstack(value = labels, num = params['output_sequence_length'], axis = 1) # list of output_sequence_length long of shape = (current_batch_size,)
        print("encoder_decoder_stacked_lstm_regression: unstacked_labels = {}".format(unstacked_labels))

        # Expand each 1-D label tensor back into a 2-D tensor
        expanded_unstacked_labels = [tf.expand_dims(input = tensor, axis = -1) for tensor in unstacked_labels] # list of output_sequence_length long of shape = (current_batch_size, 1)
        print("encoder_decoder_stacked_lstm_regression: expanded_unstacked_labels = {}".format(expanded_unstacked_labels))

        # Call our decoder using the labels as our inputs, the encoder final state as our initial state, our other LSTM stack as our cells, and inference set to false
        decoder_outputs, decoder_states = rnn_decoder(decoder_inputs = expanded_unstacked_labels, initial_state = encoder_final_state, cell = stacked_lstm_cells, inference = False)
    else:
        # Since this is inference create fake labels. The list length needs to be the output sequence length even though only the first element is actually used (as our go signal)
        fake_labels = [tf.zeros(shape = [current_batch_size, 1]) for _ in range(params['output_sequence_length'])]
        print("encoder_decoder_stacked_lstm_regression: fake_labels = {}".format(fake_labels))

        # Call our decoder using fake labels as our inputs, the encoder final state as our initial state, our other LSTM stack as our cells, and inference set to true
        decoder_outputs, decoder_states = rnn_decoder(decoder_inputs = fake_labels, initial_state = encoder_final_state, cell = stacked_lstm_cells, inference = True)
    print("encoder_decoder_stacked_lstm_regression: decoder_outputs = {}".format(decoder_outputs)) # list output_sequence_length long of shape = (current_batch_size, lstm_hidden_units[-1])
    print("encoder_decoder_stacked_lstm_regression: decoder_states = {}".format(decoder_states)) # tuple of final decoder c_state and h_state

    # Stack together the list of decoder output tensors into one 
    stacked_decoder_outputs = tf.stack(values = decoder_outputs, axis = 0) # shape = (current_batch_size * output_sequence_length, lstm_hidden_units[-1])
    print("encoder_decoder_stacked_lstm_regression: stacked_decoder_outputs = {}".format(stacked_decoder_outputs))

    ################################################################################

    # 3. Create the DNN structure now after the encoder-decoder LSTM stack
    # Create the input layer to our DNN
    network = stacked_decoder_outputs # shape = (current_batch_size * output_sequence_length, lstm_hidden_units[-1])
    print("encoder_decoder_stacked_lstm_regression: network = {}".format(network))

    # Reuse the same variable scope as we used within our decoder (for inference)
    with tf.variable_scope(name_or_scope = "dnn", reuse = tf.AUTO_REUSE):
        # Add hidden layers with the given number of units/neurons per layer
        for units in params['dnn_hidden_units']:
            network = tf.layers.dense(inputs = network, units = units, activation = tf.nn.relu) # shape = (current_batch_size * output_sequence_length, dnn_hidden_units[i])
            print("encoder_decoder_stacked_lstm_regression: network = {}, units = {}".format(network, units))

        # Connect the final hidden layer to a dense layer with no activation to get the logits
        logits = tf.layers.dense(inputs = network, units = 1, activation = None) # shape = (current_batch_size * output_sequence_length, 1)
        print("encoder_decoder_stacked_lstm_regression: logits = {}\n".format(logits))
  
    # Now that we are through the final DNN for each sequence element for each example in the batch, reshape the predictions to match our labels
    predictions = tf.reshape(tensor = logits, shape = [current_batch_size, params['output_sequence_length']]) # shape = (current_batch_size, output_sequence_length)
    print("encoder_decoder_stacked_lstm_regression: predictions = {}\n".format(predictions))

    # 3. Loss function, training/eval ops
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        loss = tf.losses.mean_squared_error(labels = labels, predictions = predictions)
        train_op = tf.contrib.layers.optimize_loss(
            loss = loss,
            global_step = tf.train.get_global_step(),
            learning_rate = params['learning_rate'],
            optimizer = "Adam")
        eval_metric_ops = {
            "rmse": tf.metrics.root_mean_squared_error(labels = labels, predictions = predictions),
            "mae": tf.metrics.mean_absolute_error(labels = labels, predictions = predictions)
        }
    else:
        loss = None
        train_op = None
        eval_metric_ops = None

    # 4. Create predictions
    predictions_dict = {"predicted": predictions}

    # 5. Create export outputs
    export_outputs = {"predict_export_outputs": tf.estimator.export.PredictOutput(outputs = predictions)}

    # 6. Return EstimatorSpec
    return tf.estimator.EstimatorSpec(
        mode = mode,
        predictions = predictions_dict,
        loss = loss,
        train_op = train_op,
        eval_metric_ops = eval_metric_ops,
        export_outputs = export_outputs)