tools/agile-machine-learning-api/codes/trainer/models.py (345 lines of code) (raw):
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common script for all estimator models."""
import tensorflow as tf
from utils import custom_utils_fns
class CannedModel(object):
"""Builds a model
using tensorflow canned estimators.
"""
def __init__(
self, model_name,
feature_columns,
deep_columns=None,
hidden_units=None,
model_dir=None,
n_classes=2,
label_dimension=1,
weight_column=None,
label_vocabulary=None,
linear_optimizer='Ftrl',
dnn_optimizer='Adagrad',
activation_fn=None,
dropout=None,
batch_norm=None,
partitioner=None,
warm_start_from=None,
input_layer_partitioner=None,
loss_reduction=tf.losses.Reduction.MEAN,
linear_sparse_combiner='sum',
config=None):
"""Initializes the instance with parameters parsed from the user
Args:
model_name : string, Name of the model
feature_columns : tf.feature_column object, Normal feature columns
deep_columns : tf.feature_column object, DNN feature columns
hidden_units : int, number of hidden units in neural network
model_dir : str, directory to store model checkpoints
n_classes : int, number of levels in target var
label_dimension : int, dimensions of the label column
weight_column : str, column defining feature column representing weights
label_vocabulary : list of string, A list of strings represents possible label values
linear_optimizer : str, An instance of tf.Optimizer used to train the model
dnn_optimizer : str, An instance of tf.Optimizer used to train the deep learning model
activation_fn : function, Activation function applied to each layer
dropout : float, When not None, the probability we will drop out a given coordinate.
batch_norm : boolean, Whether to use batch normalization after each hidden layer.
partitioner : a partitioner function, Partitioner for input layer.
warm_start_from : str, A string filepath to a checkpoint to warm-start from
input_layer_partitioner : a partitioner function, Partitioner for input layer.
loss_reduction : tf.losses.Reduction object, used to reduce training loss over batch.
linear_sparse_combiner : str, A string specifying how to reduce
the linear model if a categorical column is multivalent.
config : tf.Config object, RunConfig object to configure the runtime settings.
"""
self.model_name = model_name
self.feature_columns = feature_columns
self.deep_columns = deep_columns
self.hidden_units = hidden_units
self.model_dir = model_dir
self.n_classes = n_classes
self.label_dimension = label_dimension
self.weight_column = weight_column
self.label_vocabulary = label_vocabulary
self.linear_optimizer = linear_optimizer
self.dnn_optimizer = dnn_optimizer
self.activation_fn = activation_fn
self.dropout = dropout
self.batch_norm = batch_norm
self.input_layer_partitioner = input_layer_partitioner
self.partitioner = partitioner
self.warm_start_from = warm_start_from
self.loss_reduction = loss_reduction
self.linear_sparse_combiner = linear_sparse_combiner
self.config = config
def linear_classifier(self):
"""Builds the logistic regression model
with the parameters parsed from the user input
Returns : tf.estimator object, Canned estimator of Linear Classifier
"""
return tf.estimator.LinearClassifier(
config=self.config,
feature_columns=self.feature_columns,
label_vocabulary=self.label_vocabulary,
loss_reduction=self.loss_reduction,
n_classes=self.n_classes,
optimizer=self.linear_optimizer,
partitioner=self.partitioner,
warm_start_from=self.warm_start_from
)
def linear_regressor(self):
"""Builds the linear regression model
with the parameters parsed from the user input
Returns : tf.estimator object, Canned estimator of Linear Regressor
"""
return tf.estimator.LinearRegressor(
config=self.config,
feature_columns=self.feature_columns,
label_dimension=self.label_dimension,
optimizer=self.linear_optimizer,
weight_column=self.weight_column,
partitioner=self.partitioner,
warm_start_from=self.warm_start_from,
loss_reduction=tf.losses.Reduction.MEAN
)
def dnn_classifier(self):
"""Builds the DNN model(classifier)
with the parameters parsed from the user input
Returns : tf.estimator object, Canned estimator of DNN Classifier
"""
return tf.estimator.DNNClassifier(
config=self.config,
feature_columns=self.deep_columns,
hidden_units=self.hidden_units,
n_classes=self.n_classes,
weight_column=self.weight_column,
label_vocabulary=self.label_vocabulary,
optimizer=self.dnn_optimizer,
activation_fn=self.activation_fn,
dropout=self.dropout,
input_layer_partitioner=self.input_layer_partitioner,
warm_start_from=self.warm_start_from,
loss_reduction=self.loss_reduction
)
def dnn_regressor(self):
"""Builds the DNN model(regressor)
with the parameters parsed from the user input
Returns : tf.estimator object, Canned estimator of DNN Regressor
"""
return tf.estimator.DNNRegressor(
config=self.config,
feature_columns=self.deep_columns,
hidden_units=self.hidden_units,
label_dimension=self.label_dimension,
weight_column=self.weight_column,
optimizer=self.dnn_optimizer,
activation_fn=self.activation_fn,
dropout=self.dropout,
input_layer_partitioner=self.input_layer_partitioner,
warm_start_from=self.warm_start_from,
loss_reduction=self.loss_reduction
)
def combined_classifier(self):
"""Builds a combined DNN and linear classifier parsed from user input.
Returns : tf.estimator object, Canned estimator of Combined Classifier
"""
return tf.estimator.DNNLinearCombinedClassifier(
config=self.config,
linear_feature_columns=self.feature_columns,
linear_optimizer=self.linear_optimizer,
dnn_feature_columns=self.deep_columns,
dnn_hidden_units=self.hidden_units,
dnn_activation_fn=self.activation_fn,
dnn_dropout=self.dropout,
n_classes=self.n_classes,
weight_column=self.weight_column,
label_vocabulary=self.label_vocabulary,
input_layer_partitioner=self.input_layer_partitioner,
warm_start_from=self.warm_start_from,
loss_reduction=self.loss_reduction,
batch_norm=self.batch_norm,
linear_sparse_combiner=self.linear_sparse_combiner
)
def combined_regressor(self):
"""Builds a combined DNN and linear regressor parsed from user input.
Returns : tf.estimator object, Canned estimator of Combined Regressor
"""
return tf.estimator.DNNLinearCombinedRegressor(
config=self.config,
linear_feature_columns=self.feature_columns,
linear_optimizer=self.linear_optimizer,
dnn_feature_columns=self.deep_columns,
dnn_hidden_units=self.hidden_units,
dnn_activation_fn=self.activation_fn,
dnn_dropout=self.dropout,
label_dimension=self.label_dimension,
weight_column=self.weight_column,
input_layer_partitioner=self.input_layer_partitioner,
warm_start_from=self.warm_start_from,
loss_reduction=self.loss_reduction,
batch_norm=self.batch_norm,
linear_sparse_combiner=self.linear_sparse_combiner
)
def build_model(self):
"""Builds one the models from the above list.
Returns : A Canned Estimator of initiated model name"""
if self.model_name == 'linearclassifier':
model = self.linear_classifier()
elif self.model_name == 'linearregressor':
model = self.linear_regressor()
elif self.model_name == 'dnnclassifier':
model = self.dnn_classifier()
elif self.model_name == 'dnnregressor':
model = self.dnn_regressor()
elif self.model_name == 'combinedclassifier':
model = self.combined_classifier()
elif self.model_name == 'combinedregressor':
model = self.combined_regressor()
return model
class CustomModel(object):
"""Builds a Custom model
using tensorflow estimators.
"""
def __init__(
self,
model_name,
batch_size,
optimizer,
feature_names,
model_dir=None,
config=None,
warm_start_from=None,
learning_rate=0.03,
polynomial_degree=2):
"""Initializes the classifier instance with parameters parsed from the user
Args:
model_name : str, name of the model
batch_size : int, batch size
optimizer : str, name of the optimizer to be used
feature_columns : tf.feature_column object, Normal feature columns
model_dir : str, directory to store model checkpoints
config : tf.Config object, RunConfig object to configure the runtime settings
warm_start_from : str, A string filepath to a checkpoint to warm-start from
polynomial_degree : int, degree to which polynomial model is to be used
"""
self.model_name = model_name
self.batch_size = batch_size
self.model_dir = model_dir
self.optimizer = optimizer
self.config = config
self.warm_start_from = warm_start_from
self.polynomial_degree = polynomial_degree
self.learning_rate = learning_rate
self.feature_names = feature_names
@staticmethod
def poly_regression_model_fn(features, labels, mode, params):
"""Model function for custom model
Args:
features : This is batch_features from input_fn
labels : This is batch_features from input_fn
mode : An instance of tf.estimator.ModeKeys
params : Additional configuration
Returns: A Custom Estimator Spec of Polynomial regression
"""
logits = custom_utils_fns.logits(features, params)
labels = tf.reshape(labels, [1, params.batch_size])
loss = tf.losses.mean_squared_error(labels=labels, predictions=logits)
# Prediction mode
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {'prediction': logits}
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.PREDICT,
predictions=predictions,
export_outputs={
'regress': tf.estimator.export.PredictOutput(predictions)
}
)
# Training mode
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = params['optimizer']
rmse = tf.metrics.root_mean_squared_error(
tf.cast(labels, dtype=tf.float32),
logits
)
mae = tf.metrics.mean_absolute_error(
tf.cast(labels, dtype=tf.float32),
logits
)
mse = tf.metrics.mean_squared_error(
tf.cast(labels, dtype=tf.float32),
logits
)
# Name tensors to be logged with LoggingTensorHook.
tf.identity(loss, 'loss')
tf.identity(rmse[1], name='rmse')
tf.identity(mae[1], name='mae')
tf.identity(mse[1], name='mse')
# Save accuracy scalar to Tensorboard output.
tf.summary.scalar('rmse', rmse[1])
tf.summary.scalar('mae', mae[1])
tf.summary.scalar('mse', mse[1])
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.TRAIN,
loss=loss,
train_op=optimizer.minimize(
loss,
tf.train.get_or_create_global_step()
)
)
# Evaluation mode
if mode == tf.estimator.ModeKeys.EVAL:
rmse = tf.metrics.root_mean_squared_error(
tf.cast(labels, dtype=tf.float32),
logits
)
mae = tf.metrics.mean_absolute_error(
tf.cast(labels, dtype=tf.float32),
logits
)
mse = tf.metrics.mean_squared_error(
tf.cast(labels, dtype=tf.float32),
logits
)
# Name tensors to be logged with LoggingTensorHook.
tf.identity(loss, 'loss')
tf.identity(rmse[1], name='rmse')
tf.identity(mae[1], name='mae')
tf.identity(mse[1], name='mse')
metrics = {'rmse': rmse, 'mae': mae, 'mse': mse}
# Save accuracy scalar to Tensorboard output.
tf.summary.scalar('rmse', rmse[1])
tf.summary.scalar('mae', mae[1])
tf.summary.scalar('mse', mse[1])
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
loss=loss,
eval_metric_ops=metrics
)
return return_estimator_spec
@staticmethod
def poly_classification_model_fn(features, labels, mode, params):
"""Model function for classification custom models
Args:
features : This is batch_features from input_fn
labels : This is batch_features from input_fn
mode : An instance of tf.estimator.ModeKeys
params : Additional configuration
Returns: A Custom Estimator Spec of Polynomial classification
"""
logits = custom_utils_fns.logits(features, params)
logits = tf.sigmoid(logits)
labels = tf.reshape(labels, [1, params['batch_size']])
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=tf.cast(labels, dtype=tf.float32),
logits=logits
)
)
# Prediction mode
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {'prediction': logits}
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.PREDICT,
predictions=predictions,
export_outputs={
'regress': tf.estimator.export.PredictOutput(predictions)
}
)
# Training mode
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = params['optimizer']
accuracy = tf.metrics.accuracy(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
recall = tf.metrics.recall(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
precision = tf.metrics.precision(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
# Name tensors to be logged with LoggingTensorHook
tf.identity(loss, 'loss')
tf.identity(accuracy[1], name='accuracy')
tf.identity(recall[1], name='recall')
tf.identity(precision[1], name='precision')
# Save accuracy scalar to Tensorboard output.
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('precision', precision[1])
tf.summary.scalar('recall', recall[1])
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.TRAIN,
loss=loss,
train_op=optimizer.minimize(
loss,
tf.train.get_or_create_global_step()
)
)
# Evaluation mode
if mode == tf.estimator.ModeKeys.EVAL:
accuracy = tf.metrics.accuracy(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
recall = tf.metrics.recall(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
precision = tf.metrics.precision(
labels=tf.cast(labels, dtype=tf.float32),
predictions=logits
)
# Name tensors to be logged with LoggingTensorHook.
tf.identity(loss, 'loss')
tf.identity(accuracy[1], name='accuracy')
tf.identity(recall[1], name='recall')
tf.identity(precision[1], name='precision')
# Save accuracy scalar to Tensorboard output.
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('precision', precision[1])
tf.summary.scalar('recall', recall[1])
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall
}
return_estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
loss=loss,
eval_metric_ops=metrics
)
return return_estimator_spec
def polynomial_regressor(self):
"""Builds the polynomial regression model
with the parameters parsed from the user input
Returns: A Custom Estimator of Polynomial regression
"""
return tf.estimator.Estimator(
model_fn=self.poly_regression_model_fn,
model_dir=self.model_dir, config=self.config,
params={
'batch_size': self.batch_size,
'polynomial_degree': self.polynomial_degree,
'feature_names': self.feature_names,
'optimizer': self.optimizer
},
warm_start_from=self.warm_start_from
)
def polynomial_classifier(self):
"""Builds the logistic classification model
with the parameters parsed from the user input
Returns: A Custom Estimator of Polynomial classifier
"""
return tf.estimator.Estimator(
model_fn=self.poly_classification_model_fn,
model_dir=self.model_dir,
config=self.config,
params={
'degree': self.polynomial_degree,
'feature_names': self.feature_names,
'batch_size': self.batch_size,
'optimizer': self.optimizer
}
)
def build_model(self):
"""Builds one the models from the above list.
Returns: A Custom Estimator of initiated model name
"""
if self.model_name == 'polynomialregressor':
model = self.polynomial_regressor()
elif self.model_name == 'polynomialclassifier':
model = self.polynomial_classifier()
return model