### IMPORTANT NOTE: 
- You need to restart the kernel under Kernel | Restart Kernel after installing TensorFlow
- Run the first cell ```!pip install -q tensorflow --user``` and then restart the kernel
- You can then run the entire notebook under Run | Run All Cells
   

### Use Cases:
    - Do you have the need to create custom models in Tensorflow and score your data in BigQuery
    - BigQuery has BQML, but this might not cover all of your ML needs
    - You can create a custom model and then import the model into BigQuery and use it for scoring

### Note:
    - These models are samples and are not trained for accuracy or precission
    
### Description: 
    - Query data in BigQuery
    - Create a Linear Regression model
    - Export the model to storage
    - Create a Deep Neural Network model
    - Export the model to storage
    - Use the BigQuery stored procedure "sp_demo_machine_learning_import_tensorflow" to import the models and score data

### Reference:
    - https://www.tensorflow.org/tutorials/keras/regression
    - https://www.tensorflow.org/hub/exporting_tf2_saved_model
    - https://cloud.google.com/bigquery-ml/docs/making-predictions-with-imported-tensorflow-models

### Clean up / Reset script:
    n/a   


In [None]:
# Remove prior saved models
!gsutil rm -r gs://${bucket_name}/tensorflow

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

### Predict the fare amount based upon time (minutes) and distance

In [None]:
%%bigquery df_taxi_trips
SELECT Fare_Amount,
       Trip_Distance,
       CAST(DATETIME_DIFF(Dropoff_DateTime, Pickup_DateTime, MINUTE) AS FLOAT64) AS Minutes
  FROM `${project_id}.taxi_dataset.taxi_trips`
 WHERE Pickup_DateTime BETWEEN '2020-01-01' AND '2020-01-31'  -- Small dataset
   AND DATETIME_DIFF(Dropoff_DateTime, Pickup_DateTime, MINUTE) BETWEEN 5 AND 60  -- Somewhat of a normal time
   AND Fare_Amount > 0
   AND Trip_Distance > 0
LIMIT 10000;  -- not too many so we are not here all day

In [None]:
df_taxi_trips.head()

In [None]:
train_dataset = df_taxi_trips.sample(frac=0.8, random_state=0)
test_dataset = df_taxi_trips.drop(train_dataset.index)

In [None]:
sns.pairplot(train_dataset[['Fare_Amount', 'Trip_Distance', 'Minutes']], diag_kind='kde')

In [None]:
train_dataset.describe().transpose()

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('Fare_Amount')
test_labels = test_features.pop('Fare_Amount')

In [None]:
train_dataset.describe().transpose()[['mean', 'std']]

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)

In [None]:
normalizer.adapt(np.array(train_features))

In [None]:
tf.keras.layers.Normalization

In [None]:
print(normalizer.mean.numpy())

In [None]:
taxi_fare_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])

taxi_fare_model.summary()

In [None]:
taxi_fare_model.predict(train_features[:10])

In [None]:
taxi_fare_model.layers[1].kernel

In [None]:
taxi_fare_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = taxi_fare_model.fit(
    train_features,
    train_labels,
    epochs=100,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [Fare_Amount]')
  plt.legend()
  plt.grid(True)

In [None]:
plot_loss(history)

In [None]:
test_results = {}

test_results['taxi_fare_model'] = taxi_fare_model.evaluate(
    test_features,
    test_labels, verbose=0)

In [None]:
# BigQuery: Run your model in BigQuery and here to see that they are the same

# Create a TensorFlow tensor with the correct shape
input_data = tf.constant([[10, 20]], dtype=tf.float32) # Adjust dtype if needed

prediction = taxi_fare_model.predict(input_data)
print(prediction)

In [None]:
# Generated test data
x = 10 * np.random.random_sample((100, 2)) 
y = taxi_fare_model.predict(x,)
#print(x)
#print(y)

In [None]:
from mpl_toolkits import mplot3d

In [None]:
%matplotlib inline

In [None]:
# Set size, this works in notebooks
plt.rcParams['figure.figsize'] = [12, 12]

In [None]:
ax = plt.axes(projection='3d')
ax.set_xlabel('Distance')
ax.set_ylabel('Time (minutes)')
ax.set_zlabel('Fare Amount');
ax.set_title('Fare Predictions');

# Data for three-dimensional scattered points
zdata = y[:,0]
xdata = x[:,0]
ydata = x[:,1]
ax.scatter3D(xdata, ydata, zdata, c=zdata, s=50, cmap='Blues')

plt.show()

In [None]:
taxi_fare_model.save('gs://${bucket_name}/tensorflow/taxi_fare_model/linear_regression')

In [None]:
def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

In [None]:
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
# Set size, this works in notebooks
plt.rcParams['figure.figsize'] = [6, 6]

In [None]:
plot_loss(history)

In [None]:
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)

In [None]:
test_predictions = dnn_model.predict(test_features).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel('Prediction Error [Fare_Amount]')
_ = plt.ylabel('Count')

In [None]:
# BigQuery: Run your model in BigQuery and here to see that they are the same
input_data = tf.constant([[10.0,20.0]], dtype=tf.float32) # Adjust dtype if needed

prediction = taxi_fare_model.predict(input_data)
print(prediction)

In [None]:
input_data = tf.constant([[10.0,20.0], [5,6] ], dtype=tf.float32) # Adjust dtype if needed

prediction = taxi_fare_model.predict(input_data)
print(prediction)

In [None]:
dnn_model.save('gs://${bucket_name}/tensorflow/taxi_fare_model/dnn')