def _add_argparse

def _add_argparse_args()

in sdks/python/apache_beam/options/pipeline_options.py [0:0]
177 lines of code
24 McCabe index (conditional complexity)

  def _add_argparse_args(cls, parser):
    parser.add_argument(
        '--num_workers',
        type=int,
        default=None,
        help=(
            'Number of workers to use when executing the Dataflow job. If not '
            'set, the Dataflow service will use a reasonable default.'))
    parser.add_argument(
        '--max_num_workers',
        type=int,
        default=None,
        help=(
            'Maximum number of workers to use when executing the Dataflow job.'
        ))
    parser.add_argument(
        '--autoscaling_algorithm',
        type=str,
        choices=['NONE', 'THROUGHPUT_BASED'],
        default=None,  # Meaning unset, distinct from 'NONE' meaning don't scale
        help=
        ('If and how to autoscale the workerpool.'))
    parser.add_argument(
        '--worker_machine_type',
        '--machine_type',
        dest='machine_type',
        default=None,
        help=(
            'Machine type to create Dataflow worker VMs as. See '
            'https://cloud.google.com/compute/docs/machine-types '
            'for a list of valid options. If not set, '
            'the Dataflow service will choose a reasonable '
            'default.'))
    parser.add_argument(
        '--disk_size_gb',
        type=int,
        default=None,
        help=(
            'Remote worker disk size, in gigabytes, or 0 to use the default '
            'size. If not set, the Dataflow service will use a reasonable '
            'default.'))
    parser.add_argument(
        '--worker_disk_type',
        '--disk_type',
        dest='disk_type',
        default=None,
        help=('Specifies what type of persistent disk should be used.'))
    parser.add_argument(
        '--worker_region',
        default=None,
        help=(
            'The Compute Engine region (https://cloud.google.com/compute/docs/'
            'regions-zones/regions-zones) in which worker processing should '
            'occur, e.g. "us-west1". Mutually exclusive with worker_zone. If '
            'neither worker_region nor worker_zone is specified, default to '
            'same value as --region.'))
    parser.add_argument(
        '--worker_zone',
        default=None,
        help=(
            'The Compute Engine zone (https://cloud.google.com/compute/docs/'
            'regions-zones/regions-zones) in which worker processing should '
            'occur, e.g. "us-west1-a". Mutually exclusive with worker_region. '
            'If neither worker_region nor worker_zone is specified, the '
            'Dataflow service will choose a zone in --region based on '
            'available capacity.'))
    parser.add_argument(
        '--zone',
        default=None,
        help=(
            'GCE availability zone for launching workers. Default is up to the '
            'Dataflow service. This flag is deprecated, and will be replaced '
            'by worker_zone.'))
    parser.add_argument(
        '--network',
        default=None,
        help=(
            'GCE network for launching workers. Default is up to the Dataflow '
            'service.'))
    parser.add_argument(
        '--subnetwork',
        default=None,
        help=(
            'GCE subnetwork for launching workers. Default is up to the '
            'Dataflow service. Expected format is '
            'regions/REGION/subnetworks/SUBNETWORK or the fully qualified '
            'subnetwork name. For more information, see '
            'https://cloud.google.com/compute/docs/vpc/'))
    parser.add_argument(
        '--worker_harness_container_image',
        default=None,
        help=(
            'Docker registry location of container image to use for the '
            'worker harness. If not set, an appropriate approved Google Cloud '
            'Dataflow image will be used based on the version of the '
            'SDK. Note: This flag is deprecated and only supports '
            'approved Google Cloud Dataflow container images. To provide a '
            'custom container image, use sdk_container_image instead.'))
    parser.add_argument(
        '--sdk_container_image',
        default=None,
        help=(
            'Docker registry location of container image to use for the '
            'worker harness. If not set, an appropriate approved Google Cloud '
            'Dataflow image will be used based on the version of the '
            'SDK. If set for a non-portable pipeline, only official '
            'Google Cloud Dataflow container images may be used here.'))
    parser.add_argument(
        '--sdk_harness_container_image_overrides',
        action='append',
        default=None,
        help=(
            'Overrides for SDK harness container images. Could be for the '
            'local SDK or for a remote SDK that pipeline has to support due '
            'to a cross-language transform. Each entry consist of two values '
            'separated by a comma where first value gives a regex to '
            'identify the container image to override and the second value '
            'gives the replacement container image.'))
    parser.add_argument(
        '--default_sdk_harness_log_level',
        default=None,
        help=(
            'Controls the default log level of all loggers without a log level '
            'override. Values can be either a labeled level or a number '
            '(See https://docs.python.org/3/library/logging.html#levels). '
            'Default log level is INFO.'))
    parser.add_argument(
        '--sdk_harness_log_level_overrides',
        type=json.loads,
        action=_DictUnionAction,
        default=None,
        help=(
            'Controls the log levels for specifically named loggers. The '
            'expected format is a json string: \'{"module":"log_level",...}\'. '
            'For example, by specifying the value \'{"a.b.c":"DEBUG"}\', '
            'the logger underneath the module "a.b.c" will be configured to '
            'output logs at the DEBUG level. Similarly, by specifying the '
            'value \'{"a.b.c":"WARNING"}\' all loggers underneath the "a.b.c" '
            'module will be configured to output logs at the WARNING level. '
            'Also, note that when multiple overrides are specified, the exact '
            'name followed by the closest parent takes precedence.'))
    parser.add_argument(
        '--use_public_ips',
        default=None,
        action='store_true',
        help='Whether to assign public IP addresses to the worker VMs.')
    parser.add_argument(
        '--no_use_public_ips',
        dest='use_public_ips',
        default=None,
        action='store_false',
        help='Whether to assign only private IP addresses to the worker VMs.')
    parser.add_argument(
        '--min_cpu_platform',
        dest='min_cpu_platform',
        type=str,
        help='GCE minimum CPU platform. Default is determined by GCP.')
    parser.add_argument(
        '--max_cache_memory_usage_mb',
        dest='max_cache_memory_usage_mb',
        type=int,
        default=0,
        help=(
            'Size of the SDK Harness cache to store user state and side '
            'inputs in MB. The cache is disabled by default. Increasing '
            'cache size might improve performance of some pipelines, such as '
            'pipelines that use iterable side input views, but can '
            'lead to an increase in memory consumption and OOM errors if '
            'workers are not appropriately provisioned. '
            'Using the cache might decrease performance pipelines using '
            'materialized side inputs. '
            'If the cache is full, least '
            'recently used elements will be evicted. This cache is per '
            'each SDK Harness instance. SDK Harness is a component '
            'responsible for executing the user code and communicating with '
            'the runner. Depending on the runner, there may be more than one '
            'SDK Harness process running on the same worker node.'))