tools/agile-machine-learning-api/train.py (77 lines of code) (raw):

# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ API framework to post a training job """ import os import yaml from googleapiclient import discovery def post( cfg, train_csv_path, eval_csv_path, task_type, target_var, data_type, column_name, na_values, condition, n_classes, to_drop, name, hidden_units, num_layers, lin_opt, deep_opt, train_steps, export_dir, jobid): """ Post request to submit the training job Args: cfg: dict, Configurations from yaml file train_csv_path: string, Path of the Train csv eval_csv_path: string, Path of the Eval csv task_type: string, Type of the task (eg LinearClassifier etc.) target_var: string, Target column name in the given data data_type: dict, A dictionary containing feature names as key and values as the types of the feature column_name: list of strings, Column names in the given data na_values: string, Null value character in the data condition: string, Condition to convert seperate classes in the target column n_classes: integer, Number of classes in target column to_drop: list of strings, Specific columns to drop name: string, Name of the model you want to use hidden_units: integer, No. of hidden units for deep classifiers and regressors num_layers: integer, No of layers for deep classifiers and regressors lin_opt: string, Linear Optimizer deep_opt: string, Deep Optimizer job_dir: string, Job directory for CMLE job train_steps: integer, No. of training steps export_dir: string, Export directory of trained model jobid: string, Job ID of the training Returns: Response of the Training job """ with open('config/train.yaml', 'rb') as config_yml: train_cfg = yaml.load(config_yml) project_id = 'projects/{}'.format(cfg['project_id']) cloudml = discovery.build('ml', 'v1') params = [ '--train_csv_path', train_csv_path, '--eval_csv_path', eval_csv_path, '--task_type', task_type, '--target_var', target_var, '--data_type', data_type, '--column_name', column_name, '--na_values', na_values, '--condition', condition, '--n_classes', n_classes, '--to_drop', to_drop, '--name', name, '--hidden_units', hidden_units, '--num_layers', num_layers, '--lin_opt', lin_opt, '--deep_opt', deep_opt, '--train_steps', train_steps, '--export_dir', export_dir ] current_models = [ 'linearclassifier', 'linearregressor', 'dnnclassifier', 'dnnregressor', 'combinedclassifier', 'combinedregressor' ] if name not in current_models: raise AssertionError( 'Please provide a model name from the following : {}'.format( str(current_models))) training_inputs = { 'scaleTier': train_cfg['scaleTier'], 'masterType': train_cfg['masterType'], 'workerType': train_cfg['workerType'], 'parameterServerType': train_cfg['parameterServerType'], 'workerCount': train_cfg['workerCount'], 'parameterServerCount': train_cfg['parameterServerCount'], 'packageUris': train_cfg['packageUris'], 'pythonModule': "trainer.launch_demo", 'args': params, 'region': train_cfg['region'], 'jobDir': os.path.join(train_cfg['jobDir'], jobid), 'runtimeVersion': train_cfg['runtimeVersion'], 'pythonVersion': train_cfg['pythonVersion'] } job_spec = {'jobId': jobid, 'trainingInput': training_inputs} response = cloudml.projects().jobs().create(body=job_spec, parent=project_id).execute() return response