def post()

in backend/training-pipeline/functions/api/start_job.py [0:0]


def post(event, context):
    print('===Starting start_job function ===')

    # Get parameters
    if event['body'] is None:
        print('No parameter passed. Returning error.')
        return create_response_obj(400, {
            'errorMessage': 'No parameter passed in the POST body'
        })

    body = json.loads(event['body'])

    # Input for Step Function
    sf_input = copy.deepcopy(template_input)
    sf_input['Mode'] = MODE.value

    # Input source for training
    s3_data_source = sf_input['Training']['InputDataConfig'][0]['DataSource']['S3DataSource']
    s3_filename = str(body['trainingDataS3Name'])
    s3_data_source['S3Uri'] = f"{s3_path}public/{s3_filename}"
    print(f"Training dataset is from: {s3_data_source['S3Uri']}")

    if MODE == Mode.MODEL:
        print('Training new model mode inited')

        name = MODE.value.lower() + '-' + body['modelName']
        # Hyper parameters
        sf_input['Training']['HyperParameters'] = {
            'final_training': 'True',
            # Common parameters
            'target': str(body['target']),
            'batch_normalization': str(body['batchNormalization']),
            'include_dropout': str(body['includeDropout']),
            'loss_metric': str(body['lossMetric']),
            'monitor_metric': str(body['monitorMetric']),
            'lr_update_patience': str(body['lrUpdatePatience']),
            'early_stopping_patience': str(body['earlyStoppingPatience']),
            # Train new model specific parameter
            'nb_epochs_f': str(body['nbEpochsF']),
            'batch_size_f': str(body['batchSizeF']),
            'optimizer_f': str(body['optimizerF']),
            'last_activation_f': str(body['lastActivationF']),
            'num_layers_f': str(len(body['nodes'])),
            'nodes': str(body['nodes'][:-1])
        }

        # Use the same name for all component for ease of tracing
        sf_input['Training']['TrainingJobName'] = name
        sf_input['Create Model']['ModelName'] = name
        sf_input['Configure Endpoint']['EndpointConfigName'] = name
        sf_input['Configure Endpoint']['ProductionVariants'][0]['ModelName'] = name
        sf_input['Deploy']['EndpointConfigName'] = name
        sf_input['Deploy']['EndpointName'] = name
    elif MODE == Mode.HPO:
        print('HPO mode inited')

        now = datetime.datetime.now()
        name = MODE.value.lower() + '-' + now.strftime("%Y-%m-%d-%H-%M-%S")

        del sf_input['Create Model']
        del sf_input['Configure Endpoint']
        del sf_input['Deploy']

        sf_input['Training']['TrainingJobName'] = name
        sf_input['Training']['HyperParameters'] = {
            'final_training': 'False',
            # Common parameters
            'target': str(body['target']),
            'batch_normalization': str(body['batchNormalization']),
            'include_dropout': str(body['includeDropout']),
            'loss_metric': str(body['lossMetric']),
            'monitor_metric': str(body['monitorMetric']),
            'lr_update_patience': str(body['lrUpdatePatience']),
            'early_stopping_patience': str(body['earlyStoppingPatience']),
            # HPO-specific parameters
            'dropout': str(body['dropout']),
            'train_validation_split': str(body['trainValidationSplit']),
            'used_data_percentage': str(body['usedDataPercentage']),
            'choice_of_node_numbers': str(body['choiceOfNodeNumbers']),
            'batch_size': str(body['batchSize']),
            'MAX_EVALS': str(body['maxEval']),
            'randstate': str(body['randomState']),
            'num_layers_low': str(body['numLayersLow']),
            'num_layers_high': str(body['numLayersHigh']),
            'nb_epochs': str(body['nbEpochs']),
            'optimizer': str(body['optimizers']),
            'last_activation': str(body['activationFunctions']),
        }

    print('====sf_input===')
    print(json.dumps(sf_input, indent=1))

    print(f"TRAINING_STATE_MACHINE_ARN = {TRAINING_STATE_MACHINE_ARN}")

    sf_response = sf_client.start_execution(
        stateMachineArn=TRAINING_STATE_MACHINE_ARN,
        input=json.dumps(sf_input)
    )

    print(f"sf_response = {sf_response}")

    return create_response_obj(200, sf_response)