def generate_neptune_ml

def generate_neptune_ml_parser()

in src/graph_notebook/magics/ml.py [0:0]
282 lines of code
1 McCabe index (conditional complexity)

def generate_neptune_ml_parser():
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help='sub-command help', dest='which')

    # Begin Export subparsers
    parser_export = subparsers.add_parser('export', help='')
    export_sub_parsers = parser_export.add_subparsers(help='', dest='which_sub')
    export_start_parser = export_sub_parsers.add_parser('start', help='start a new exporter job')
    export_start_parser.add_argument('--export-url', type=str,
                                     help='api gateway endpoint to call the exporter such as '
                                          'foo.execute-api.us-east-1.amazonaws.com/v1')
    export_start_parser.add_argument('--export-iam', action='store_true',
                                     help='flag for whether to sign requests to the export url with SigV4')
    export_start_parser.add_argument('--export-no-ssl', action='store_true',
                                     help='toggle ssl off when connecting to exporter')
    export_start_parser.add_argument('--wait', action='store_true', help='wait for the exporter to finish running')
    export_start_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                     help=f'time in seconds between export status check. '
                                          f'default: {DEFAULT_WAIT_INTERVAL}')
    export_start_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                     help=f'time in seconds to wait for a given export job to complete before '
                                          f'returning most recent status. default: {DEFAULT_WAIT_TIMEOUT}')
    export_start_parser.add_argument('--store-to', default='', dest='store_to',
                                     help='store result to this variable. If --wait is specified, will store the '
                                          'final status.')

    export_status_parser = export_sub_parsers.add_parser('status', help='obtain status of exporter job')
    export_status_parser.add_argument('--job-id', type=str, help='job id to check the status of')
    export_status_parser.add_argument('--export-url', type=str,
                                      help='api gateway endpoint to call the exporter such as '
                                           'foo.execute-api.us-east-1.amazonaws.com/v1')
    export_status_parser.add_argument('--export-iam', action='store_true',
                                      help='flag for whether to sign requests to the export url with SigV4')
    export_status_parser.add_argument('--export-no-ssl', action='store_true',
                                      help='toggle ssl off when connecting to exporter')
    export_status_parser.add_argument('--store-to', default='', dest='store_to',
                                      help='store result to this variable')
    export_status_parser.add_argument('--wait', action='store_true', help='wait for the exporter to finish running')
    export_status_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                      help=f'time in seconds between export status check. '
                                           f'default: {DEFAULT_WAIT_INTERVAL}')
    export_status_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                      help=f'time in seconds to wait for a given export job to complete before '
                                           f'returning most recent status. default: {DEFAULT_WAIT_TIMEOUT}')

    # Begin dataprocessing subparsers
    parser_dataprocessing = subparsers.add_parser('dataprocessing', help='')
    dataprocessing_subparsers = parser_dataprocessing.add_subparsers(help='dataprocessing sub-command',
                                                                     dest='which_sub')
    dataprocessing_start_parser = dataprocessing_subparsers.add_parser('start', help='start a new dataprocessing job')
    dataprocessing_start_parser.add_argument('--job-id', type=str, default='',
                                             help='the unique identifier for for this processing job')
    dataprocessing_start_parser.add_argument('--prev-job-id', type=str, default='',
                                             help='the job ID of a completed data processing job run on an earlier '
                                                  'version of the data.')
    dataprocessing_start_parser.add_argument('--s3-input-uri', type=str, default='',
                                             help='input data location in s3')
    dataprocessing_start_parser.add_argument('--s3-processed-uri', type=str, default='',
                                             help='processed data location in s3')
    dataprocessing_start_parser.add_argument('--sagemaker-iam-role-arn', type=str, default='',
                                             help='The ARN of an IAM role for SageMaker execution. '
                                                  'This must be listed in your DB cluster parameter group or an error '
                                                  'will occur. ')
    dataprocessing_start_parser.add_argument('--neptune-iam-role-arn', type=str, default='',
                                             help='The Amazon Resource Name (ARN) of an IAM role that SageMaker can '
                                                  'assume to perform tasks on your behalf. This must be listed in your '
                                                  'DB cluster parameter group or an error will occur.')
    dataprocessing_start_parser.add_argument('--instance-type', type=str, default='',
                                             help='The type of ML instance used during data processing.')
    dataprocessing_start_parser.add_argument('--instance-volume-size-in-gb', type=int, default=0,
                                             help='The disk volume size of the processing instance.')
    dataprocessing_start_parser.add_argument('--timeout-in-seconds', type=int, default=86400,
                                             help='Timeout in seconds for the data processing job.')
    dataprocessing_start_parser.add_argument('--model-type', type=str, default='',
                                             help='One of the two model types that Neptune ML currently supports: '
                                                  'heterogeneous graph models (heterogeneous), '
                                                  'and knowledge graph (kge).')
    dataprocessing_start_parser.add_argument('--subnets', type=list, default=[],
                                             help='The IDs of the subnets in the Neptune VPC')
    dataprocessing_start_parser.add_argument('--security-group-ids', type=list, default=[],
                                             help='The VPC security group IDs.')
    dataprocessing_start_parser.add_argument('--volume-encryption-kms-key', type=str, default='',
                                             help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                                  'encrypt data on the storage volume attached to the ML compute '
                                                  'instances that run the processing job.')
    dataprocessing_start_parser.add_argument('--s3-output-encryption-kms-key', type=str, default='',
                                             help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                                  'encrypt the output of the processing job.')
    dataprocessing_start_parser.add_argument('--config-file-name', type=str, default='')
    dataprocessing_start_parser.add_argument('--store-to', type=str, default='',
                                             help='store result to this variable')
    dataprocessing_start_parser.add_argument('--wait', action='store_true',
                                             help='wait for the exporter to finish running')
    dataprocessing_start_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                             help='wait interval between checks for export status')
    dataprocessing_start_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                             help='timeout while waiting for export job to complete')

    dataprocessing_status_parser = dataprocessing_subparsers.add_parser('status',
                                                                        help='obtain the status of an existing '
                                                                             'dataprocessing job')
    dataprocessing_status_parser.add_argument('--job-id', type=str)
    dataprocessing_status_parser.add_argument('--store-to', type=str, default='',
                                              help='store result to this variable')
    dataprocessing_status_parser.add_argument('--wait', action='store_true',
                                              help='wait for the exporter to finish running')
    dataprocessing_status_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                              help='wait interval between checks for export status')
    dataprocessing_status_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                              help='timeout while waiting for export job to complete')

    # Begin training subparsers
    parser_training = subparsers.add_parser('training', help='training command help')
    training_subparsers = parser_training.add_subparsers(help='training sub-command help',
                                                         dest='which_sub')
    training_start_parser = training_subparsers.add_parser('start', help='start a new training job')
    training_start_parser.add_argument('--job-id', type=str, default='')
    training_start_parser.add_argument('--data-processing-id', type=str, default='')
    training_start_parser.add_argument('--s3-output-uri', type=str, default='')
    training_start_parser.add_argument('--prev-job-id', type=str, default='',
                                       help='The job ID of a completed model-training job that you want to update '
                                            'incrementally based on updated data.')
    training_start_parser.add_argument('--sagemaker-iam-role-arn', type=str, default='',
                                       help='The ARN of an IAM role for SageMaker execution.')
    training_start_parser.add_argument('--neptune-iam-role-arn', type=str, default='',
                                       help='The ARN of an IAM role that provides Neptune access to SageMaker '
                                            'and Amazon S3 resources.')
    training_start_parser.add_argument('--model_name', type=str, default='',
                                       help='The model type for training. By default the ML model is automatically '
                                            'based on the modelType used in data processing, but you can specify a '
                                            'different model type here.')
    training_start_parser.add_argument('--base-processing-instance-type', type=str, default='',
                                       help='The type of ML instance used in preparing and managing training of '
                                            'ML models.')
    training_start_parser.add_argument('--instance-type', type=str, default='')
    training_start_parser.add_argument('--instance-volume-size-in-gb', type=int, default=0,
                                       help='The disk volume size of the training instance.')
    training_start_parser.add_argument('--timeout-in-seconds', type=int, default=86400,
                                       help='Timeout in seconds for the training job.')
    training_start_parser.add_argument('--max-hpo-number', type=int, default=2,
                                       help='Maximum total number of training jobs to start for the hyperparameter '
                                            'tuning job.')
    training_start_parser.add_argument('--max-hpo-parallel', type=int, default=2,
                                       help='Maximum number of parallel training jobs to start for the hyperparameter '
                                            'tuning job.')
    training_start_parser.add_argument('--subnets', type=list, default=[],
                                       help='The IDs of the subnets in the Neptune VPC')
    training_start_parser.add_argument('--security-group-ids', type=list, default=[],
                                       help='The VPC security group IDs.')
    training_start_parser.add_argument('--volume-encryption-kms-key', type=str, default='',
                                       help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                            'encrypt data on the storage volume attached to the ML compute '
                                            'instances that run the training job.')
    training_start_parser.add_argument('--s3-output-encryption-kms-key', type=str, default='',
                                       help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                            'encrypt the output of the training job.')
    training_start_parser.add_argument('--store-to', type=str, default='', help='store result to this variable')
    training_start_parser.add_argument('--wait', action='store_true',
                                       help='wait for the exporter to finish running')
    training_start_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                       help='wait interval between checks for export status')
    training_start_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                       help='timeout while waiting for export job to complete')

    training_status_parser = training_subparsers.add_parser('status',
                                                            help='obtain the status of an existing training job')
    training_status_parser.add_argument('--job-id', type=str)
    training_status_parser.add_argument('--store-to', type=str, default='', help='store result to this variable')
    training_status_parser.add_argument('--wait', action='store_true',
                                        help='wait for the exporter to finish running')
    training_status_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                        help='wait interval between checks for export status')
    training_status_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                        help='timeout while waiting for export job to complete')

    # Being modeltransform subparsers
    parser_modeltransform = subparsers.add_parser('modeltransform', help='modeltransform command help')

    # create
    modeltransform_subparsers = parser_modeltransform.add_subparsers(help='modeltransform subcommand help',
                                                                     dest='which_sub')
    modeltransform_start_parser = modeltransform_subparsers.add_parser('start', help='start a new modeltransform job')

    modeltransform_start_parser.add_argument('--job-id', type=str, default='',
                                             help='A unique identifier for the new job.')
    modeltransform_start_parser.add_argument('--s3-output-uri', type=str,
                                             default='The URI of the S3 bucket/location to store your transform '
                                                     'result.')
    modeltransform_start_parser.add_argument('--data-processing-job-id', type=str, default='',
                                             help='The job Id of a completed data-processing job. NOTE: You must '
                                                  'include either both the dataProcessingJobId and the '
                                                  'mlModelTrainingJobId parameters, or the trainingJobName parameter.')
    modeltransform_start_parser.add_argument('--model-training-job-id', type=str, default='',
                                             help='The job Id of a completed model-training job. NOTE: You must include'
                                                  ' either both the dataProcessingJobId and the mlModelTrainingJobId '
                                                  'parameters, or the trainingJobName parameter.')
    modeltransform_start_parser.add_argument('--training-job-name', type=str, default='',
                                             help='The name of a completed SageMaker training job. NOTE: You must '
                                                  'include either both the dataProcessingJobId and the '
                                                  'mlModelTrainingJobId parameters, or the trainingJobName parameter.')
    modeltransform_start_parser.add_argument('--sagemaker-iam-role-arn', type=str, default='',
                                             help='The ARN of an IAM role for SageMaker execution.')
    modeltransform_start_parser.add_argument('--neptune-iam-role-arn', type=str, default='',
                                             help='The ARN of an IAM role that provides Neptune access to SageMaker '
                                                  'and Amazon S3 resources.')
    modeltransform_start_parser.add_argument('--base-processing-instance-type', type=str, default='',
                                             help='The type of ML instance used in preparing and managing training of '
                                                  'ML models.')
    modeltransform_start_parser.add_argument('--base-processing-instance-volume-size-in-gb', type=int, default=0,
                                             help='The disk volume size of the training instance.')
    modeltransform_start_parser.add_argument('--subnets', type=list, default=[],
                                             help='The IDs of the subnets in the Neptune VPC')
    modeltransform_start_parser.add_argument('--security-group-ids', type=list, default=[],
                                             help='The VPC security group IDs.')
    modeltransform_start_parser.add_argument('--volume-encryption-kms-key', type=str, default='',
                                             help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                                  'encrypt data on the storage volume attached to the ML compute '
                                                  'instances that run the transform job.')
    modeltransform_start_parser.add_argument('--s3-output-encryption-kms-key', type=str, default='',
                                             help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                                  'encrypt the output of the transform job.')
    modeltransform_start_parser.add_argument('--wait', action='store_true')
    modeltransform_start_parser.add_argument('--store-to', default='', dest='store_to',
                                             help='store result to this variable. '
                                                  'If --wait is specified, will store the final status.')

    # status
    modeltransform_status_subparser = modeltransform_subparsers.add_parser('status',
                                                                           help='get status of a modeltransform job')
    modeltransform_status_subparser.add_argument('--job-id', type=str, required=True,
                                                 help='modeltransform job-id to obtain the status of')
    modeltransform_status_subparser.add_argument('--iam-role-arn', '-i', type=str, default='',
                                                 help='iam role arn to use for modeltransform')
    modeltransform_status_subparser.add_argument('--wait', action='store_true')
    modeltransform_status_subparser.add_argument('--store-to', default='', dest='store_to',
                                                 help='store result to this variable. If --wait is specified, '
                                                      'will store the final status.')

    # list
    modeltransform_list_subparser = modeltransform_subparsers.add_parser('list',
                                                                         help='obtain list of modeltransform jobs')
    modeltransform_list_subparser.add_argument('--max-items', type=int, help='max number of items to obtain',
                                               default=10)
    modeltransform_list_subparser.add_argument('--iam-role-arn', '-i', type=str, default='',
                                               help='iam role arn to use for modeltransform')
    modeltransform_list_subparser.add_argument('--store-to', default='', dest='store_to',
                                               help='store result to this variable.')

    # stop
    modeltransform_stop_subparser = modeltransform_subparsers.add_parser('stop', help='stop a modeltransform job')
    modeltransform_stop_subparser.add_argument('--job-id', type=str, help='modeltransform job id', default='')
    modeltransform_stop_subparser.add_argument('--clean', action='store_true', help='flag ')
    modeltransform_stop_subparser.add_argument('--iam-role-arn', '-i', type=str, default='',
                                               help='iam role arn to use for modeltransform')
    modeltransform_stop_subparser.add_argument('--store-to', default='', dest='store_to',
                                               help='store result to this variable.')

    # Begin endpoint subparsers
    parser_endpoint = subparsers.add_parser('endpoint', help='endpoint command help')
    endpoint_subparsers = parser_endpoint.add_subparsers(help='endpoint sub-command help',
                                                         dest='which_sub')
    endpoint_start_parser = endpoint_subparsers.add_parser('create', help='create a new endpoint')
    endpoint_start_parser.add_argument('--id', type=str, default='A unique identifier for the new inference endpoint.')
    endpoint_start_parser.add_argument('--model-training-job-id', type=str, default='',
                                       help='The job Id of the completed model-training job. '
                                            'You must supply either model-training-job-id or model-transform-job-id.')
    endpoint_start_parser.add_argument('--model-transform-job-id', type=str, default='',
                                       help='The job Id of the completed model-transform job. '
                                            'You must supply either model-training-job-id or model-transform-job-id.')
    endpoint_start_parser.add_argument('--update', action='store_true', default=False,
                                       help='If present, this parameter indicates that this is an update request.')
    endpoint_start_parser.add_argument('--neptune-iam-role-arn', type=str, default='',
                                       help='The ARN of an IAM role providing Neptune access to SageMaker and Amazon '
                                            'S3 resources.')
    endpoint_start_parser.add_argument('--model-name', type=str, default='',
                                       help='Model type for training.')
    endpoint_start_parser.add_argument('--instance-type', type=str, default='ml.r5.xlarge',
                                       help='The type of ML instance used for online servicing.')
    endpoint_start_parser.add_argument('--instance-count', type=int, default=1,
                                       help='The minimum number of Amazon EC2 instances to deploy to an endpoint for '
                                            'prediction.')
    endpoint_start_parser.add_argument('--volume-encryption-kms-key', type=str, default='',
                                       help='The AWS Key Management Service (AWS KMS) key that SageMaker uses to '
                                            'encrypt data on the storage volume attached to the ML compute instance(s) '
                                            'that run the endpoints.')
    endpoint_start_parser.add_argument('--store-to', type=str, default='', help='store result to this variable')
    endpoint_start_parser.add_argument('--wait', action='store_true',
                                       help='wait for the exporter to finish running')
    endpoint_start_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                       help='wait interval between checks for export status')
    endpoint_start_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                       help='timeout while waiting for export job to complete')

    endpoint_status_parser = endpoint_subparsers.add_parser('status',
                                                            help='obtain the status of an existing endpoint '
                                                                 'creation job')
    endpoint_status_parser.add_argument('--id', type=str, default='The ID of an existing inference endpoint.')
    endpoint_status_parser.add_argument('--store-to', type=str, default='', help='store result to this variable')
    endpoint_status_parser.add_argument('--wait', action='store_true',
                                        help='wait for the exporter to finish running')
    endpoint_status_parser.add_argument('--wait-interval', default=DEFAULT_WAIT_INTERVAL, type=int,
                                        help='wait interval between checks for export status')
    endpoint_status_parser.add_argument('--wait-timeout', default=DEFAULT_WAIT_TIMEOUT, type=int,
                                        help='timeout while waiting for export job to complete')

    return parser