In [1]:
from IPython.display import Markdown as md

### change to reflect your notebook
_nb_loc = "10_mlops/10a_mlpipeline.ipynb"
_nb_title = "ML Pipeline"

### no need to change any of this
_nb_safeloc = _nb_loc.replace('/', '%2F')
md("""
<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://console.cloud.google.com/ai-platform/notebooks/deploy-notebook?name={1}&url=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fpractical-ml-vision-book%2Fblob%2Fmaster%2F{2}&download_url=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fpractical-ml-vision-book%2Fraw%2Fmaster%2F{2}">
    <img src="https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/logo-cloud.png"/> Run in AI Platform Notebook</a>
  </td>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/GoogleCloudPlatform/practical-ml-vision-book/blob/master/{0}">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/GoogleCloudPlatform/practical-ml-vision-book/blob/master/{0}">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/{0}">
    <img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>
""".format(_nb_loc, _nb_title, _nb_safeloc))


<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://console.cloud.google.com/ai-platform/notebooks/deploy-notebook?name=ML Pipeline&url=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fpractical-ml-vision-book%2Fblob%2Fmaster%2F10_mlops%2F10a_mlpipeline.ipynb&download_url=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fpractical-ml-vision-book%2Fraw%2Fmaster%2F10_mlops%2F10a_mlpipeline.ipynb">
    <img src="https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/logo-cloud.png"/> Run in AI Platform Notebook</a>
  </td>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/GoogleCloudPlatform/practical-ml-vision-book/blob/master/10_mlops/10a_mlpipeline.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/GoogleCloudPlatform/practical-ml-vision-book/blob/master/10_mlops/10a_mlpipeline.ipynb">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/10_mlops/10a_mlpipeline.ipynb">
    <img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>


# Machine Learning Pipeline

In this notebook, we show how to run the flowers classification workflow as a pipeline

## Set up

In [None]:
%pip install --upgrade --user kfp

In [1]:
# CHANGE AS needed
REGION = 'us-central1'  # Change as needed to a region where you have quota
KFPHOST = 'https://40e09ee3a33a422-dot-us-central1.pipelines.googleusercontent.com'  # Note name of launched Kubeflow Pipelines cluster

In [3]:
PROJECT = !gcloud config get-value project
PROJECT = PROJECT[0]
print(PROJECT)
%env PROJECT = {PROJECT}
%env REGION = {REGION}
BUCKET = PROJECT + "-flowers-pipeline"
%env BUCKET = {BUCKET}

ai-analytics-solutions
env: PROJECT=ai-analytics-solutions
env: REGION=us-central1
env: BUCKET=ai-analytics-solutions-flowers-pipeline


In [3]:
!gsutil mb -l {REGION} gs://{BUCKET}

Creating gs://ai-analytics-solutions-flowers-pipeline/...


Build the container

In [4]:
%%capture --no-stderr
!../build_docker_image.sh

## Convert JPEG files to TF Records

In [5]:
%%writefile components/create_dataset.yaml
name: create_dataset
description: Converts JPEG files to TensorFlow Records using Dataflow or Apache Beam
inputs:
- {name: runner, type: str, default: 'DirectRunner', description: 'DirectRunner or DataflowRunner'}
- {name: project_id, type: str, description: 'Project to bill Dataflow job to'}
- {name: region, type: str, description: 'Region to run Dataflow job in'}
- {name: input_csv, type: GCSPath, description: 'Path to CSV file'}
- {name: output_dir, type: GCSPath, description: 'Top-level directory for TF records'}
- {name: labels_dict, type: GCSPath, description: 'Dictionary file for class names'}
outputs:
- {name: tfrecords_topdir, type: GCSPath, description: 'Top-level directory for TF records'}
implementation:
  container:
    image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest
    command: [
        "bash", "/src/practical-ml-vision-book/10_mlops/components/create_dataset.sh"
    ]
    args: [
        {inputValue: output_dir},
        {outputPath: tfrecords_topdir},
        "--all_data", {inputValue: input_csv},
        "--labels_file", {inputValue: labels_dict},
        "--project_id", {inputValue: project_id},
        "--output_dir", {inputValue: output_dir},
        "--runner", {inputValue: runner},
        "--region", {inputValue: region},
    ]

Overwriting components/create_dataset.yaml


In [6]:
%%writefile components/noop_create_dataset.yaml
name: noop_create_dataset
description: Converts JPEG files to TensorFlow Records using Dataflow or Apache Beam
inputs:
- {name: runner, type: str, default: 'DirectRunner', description: 'DirectRunner or DataflowRunner'}
- {name: project_id, type: str, description: 'Project to bill Dataflow job to'}
- {name: region, type: str, description: 'Region to run Dataflow job in'}
- {name: input_csv, type: GCSPath, description: 'Path to CSV file'}
- {name: output_dir, type: GCSPath, description: 'Top-level directory for TF records'}
- {name: labels_dict, type: GCSPath, description: 'Dictionary file for class names'}
outputs:
- {name: tfrecords_topdir, type: GCSPath, description: 'Top-level directory for TF records'}
implementation:
  container:
    image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest
    command: [
        "bash", "/src/practical-ml-vision-book/10_mlops/components/noop_create_dataset.sh"
    ]
    args: [
        {inputValue: output_dir},
        {outputPath: tfrecords_topdir}
    ]

Overwriting components/noop_create_dataset.yaml


## Train model

To do it locally on the cluster instead of on CAIP, we'll use gcloud local training

<pre>
gcloud ai-platform local train --package-path $PACKAGE_PATH 
       --module-name $MODULE_NAME --job-dir ${JOB_DIR}_local 
       -- --num_training_examples 100 --with_color_distort False --crop_ratio 0.6
</pre>

In [7]:
%%writefile components/train_model_kfp.yaml
name: train_model_kfp
description: Trains an ML model on KFP
inputs:
- {name: input_topdir, type: GCSPath, description: 'Top-level directory for TF records'}
- {name: region, type: str, description: 'Region (ignored)'}
- {name: job_dir, type: GCSPath, description: 'Top-level output directory'}
outputs:
- {name: trained_model, type: GCSPath, description: 'location of trained model'}
implementation:
  container:
    image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest
    command: [
        "bash", "/src/practical-ml-vision-book/10_mlops/components/train_model_kfp.sh", 
    ]
    args: [
        {inputValue: input_topdir},
        {inputValue: region},
        {inputValue: job_dir},
        {outputPath: trained_model},
    ]

Overwriting components/train_model_kfp.yaml


In [8]:
%%writefile components/train_model_caip.yaml
name: train_model_caip
description: Trains an ML model on CAIP
inputs:
- {name: input_topdir, type: GCSPath, description: 'Top-level directory for TF records'}
- {name: region, type: str, description: 'Region'}
- {name: job_dir, type: GCSPath, description: 'Top-level output directory'}
outputs:
- {name: trained_model, type: GCSPath, description: 'location of trained model'}
implementation:
  container:
    image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest
    command: [
        "bash", "/src/practical-ml-vision-book/10_mlops/components/train_model_caip.sh", 
    ]
    args: [
        {inputValue: input_topdir},
        {inputValue: region},
        {inputValue: job_dir},
        {outputPath: trained_model},
    ]

Overwriting components/train_model_caip.yaml


## The pipeline

In [4]:
import kfp
import kfp.dsl as dsl
import json
import os

create_dataset_op = kfp.components.load_component_from_file(
    #'components/noop_create_dataset.yaml'
    'components/create_dataset.yaml'
)
train_model_op = kfp.components.load_component_from_file(
    #'components/train_model_kfp.yaml'
    'components/train_model_caip.yaml'
)
deploy_op = kfp.components.load_component_from_url(
    'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/deploy/component.yaml')

@dsl.pipeline(
    name='Flowers Transfer Learning Pipeline',
    description='End-to-end pipeline'
)
def flowerstxf_pipeline(
    project_id = PROJECT,
    bucket = BUCKET,
    region = REGION
):
    # Step 1: Create dataset
    create_dataset = create_dataset_op(
        runner='DataflowRunner',
        project_id=project_id,
        region=region,
        input_csv='gs://practical-ml-vision-book-data/flowers_5_jpeg/flower_photos/all_data.csv',
        output_dir='gs://{}/data/flower_tfrecords'.format(bucket),
        labels_dict='gs://practical-ml-vision-book-data/flowers_5_jpeg/flower_photos/dict.txt'
    )
    create_dataset.execution_options.caching_strategy.max_cache_staleness = "P7D"
    
    # Step 2: Train model
    train_model = train_model_op(
        input_topdir=create_dataset.outputs['tfrecords_topdir'],
        region=region,
        job_dir='gs://{}/trained_model'.format(bucket)
    )
    train_model.execution_options.caching_strategy.max_cache_staleness = "P0D"
    
    # Step 3: Deploy trained model
    deploy_model = deploy_op(
        model_uri=train_model.outputs['trained_model'],
        project_id=project_id, 
        model_id='flowers', 
        version_id='txf', 
        runtime_version='2.3', 
        python_version='3.7',
        version={}, 
        replace_existing_version='True', 
        set_default='True', 
        wait_interval='30')

## Compile and submit pipeline

In [5]:
pipeline_func = flowerstxf_pipeline
pipeline_filename = pipeline_func.__name__ + '.zip'
import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

In [6]:
import kfp
client = kfp.Client(host=KFPHOST)
experiment = client.create_experiment('from_notebook')
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, 
                                 {
                                     'project_id': PROJECT,
                                     'bucket': BUCKET,
                                     'region': REGION
                                 })

## License
Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.