def _add_argparse

def _add_argparse_args()

in sdks/python/apache_beam/options/pipeline_options.py [0:0]
183 lines of code
32 McCabe index (conditional complexity)

  def _add_argparse_args(cls, parser):
    # Options for installing dependencies in the worker.
    parser.add_argument(
        '--requirements_file',
        default=None,
        help=(
            'Path to a requirements file containing package dependencies. '
            'Typically it is produced by a pip freeze command. More details: '
            'https://pip.pypa.io/en/latest/reference/pip_freeze.html. '
            'If used, all the packages specified will be downloaded, '
            'cached (use --requirements_cache to change default location), '
            'and then staged so that they can be automatically installed in '
            'workers during startup. The cache is refreshed as needed '
            'avoiding extra downloads for existing packages. Typically the '
            'file is named requirements.txt.'))
    parser.add_argument(
        '--requirements_cache',
        default=None,
        help=(
            'Path to a folder to cache the packages specified in '
            'the requirements file using the --requirements_file option.'
            'If you want to skip populating requirements cache, please '
            'specify --requirements_cache="skip".'))
    parser.add_argument(
        '--requirements_cache_only_sources',
        action='store_true',
        help=(
            'Enable this flag to populate requirements cache only '
            'with Source distributions(sdists) of the dependencies '
            'mentioned in the --requirements_file'
            'Note: (BEAM-4032): This flag may significantly slow down '
            'the pipeline submission. It is added to preserve the requirements'
            ' cache behavior prior to 2.37.0 and will likely be removed in '
            'future releases.'))
    parser.add_argument(
        '--setup_file',
        default=None,
        help=(
            'Path to a setup Python file containing package dependencies. If '
            'specified, the file\'s containing folder is assumed to have the '
            'structure required for a setuptools setup package. The file must '
            'be named setup.py. More details: '
            'https://pythonhosted.org/an_example_pypi_project/setuptools.html '
            'During job submission a source distribution will be built and '
            'the worker will install the resulting package before running any '
            'custom code.'))
    parser.add_argument(
        '--beam_plugin',
        '--beam_plugins',
        dest='beam_plugins',
        action='append',
        default=None,
        help=(
            'Bootstrap the python process before executing any code by '
            'importing all the plugins used in the pipeline. Please pass a '
            'comma separated list of import paths to be included. This is '
            'currently an experimental flag and provides no stability. '
            'Multiple --beam_plugin options can be specified if more than '
            'one plugin is needed.'))
    parser.add_argument(
        '--pickle_library',
        default='default',
        help=(
            'Chooses which pickle library to use. Options are dill, '
            'cloudpickle or default.'),
        choices=['cloudpickle', 'default', 'dill', 'dill_unsafe'])
    parser.add_argument(
        '--save_main_session',
        default=None,
        action='store_true',
        help=(
            'Save the main session state so that pickled functions and classes '
            'defined in __main__ (e.g. interactive session) can be unpickled. '
            'Some workflows do not need the session state if for instance all '
            'their functions/classes are defined in proper modules '
            '(not __main__) and the modules are importable in the worker. '
            'It is disabled by default except for cloudpickle as pickle '
            'library on Dataflow runner.'))
    parser.add_argument(
        '--no_save_main_session',
        default=None,
        action='store_false',
        dest='save_main_session',
        help=(
            'Disable saving the main session state. See "save_main_session".'))

    parser.add_argument(
        '--sdk_location',
        default='default',
        help=(
            'Path to a custom Beam SDK package to install and use on the'
            'runner. It can be a URL, a GCS path, or a local path to an '
            'SDK tarball. Workflow submissions will download or copy an SDK '
            'tarball from here. If set to "default", '
            'runners will use the SDK provided in the default environment.'
            'Use this flag when running pipelines with an unreleased or '
            'manually patched version of Beam SDK.'))
    parser.add_argument(
        '--extra_package',
        '--extra_packages',
        dest='extra_packages',
        action='append',
        default=None,
        help=(
            'Local path to a Python package file. The file is expected to be '
            '(1) a package tarball (".tar"), (2) a compressed package tarball '
            '(".tar.gz"), (3) a Wheel file (".whl") or (4) a compressed '
            'package zip file (".zip") which can be installed using the '
            '"pip install" command  of the standard pip package. Multiple '
            '--extra_package options can be specified if more than one '
            'package is needed. During job submission, the files will be '
            'staged in the staging area (--staging_location option) and the '
            'workers will install them in same order they were specified on '
            'the command line.'))
    parser.add_argument(
        '--files_to_stage',
        dest='files_to_stage',
        action='append',
        default=None,
        help=(
            'Local path to a file. During job submission, the files will be '
            'staged in the staging area (--staging_location option) and then '
            'workers will upload them to the worker specific staging location '
            '(e.g. $SEMI_PERSISTENT_DIRECTORY/staged/ for portable runner.'))
    parser.add_argument(
        '--prebuild_sdk_container_engine',
        help=(
            'Prebuild sdk worker container image before job submission. If '
            'enabled, SDK invokes the boot sequence in SDK worker '
            'containers to install all pipeline dependencies in the '
            'container, and uses the prebuilt image in the pipeline '
            'environment. This may speed up pipeline execution. To enable, '
            'select the Docker build engine: local_docker using '
            'locally-installed Docker or cloud_build for using Google Cloud '
            'Build (requires a GCP project with Cloud Build API enabled). You '
            'can also subclass SdkContainerImageBuilder and use that to build '
            'in other environments.'))
    parser.add_argument(
        '--prebuild_sdk_container_base_image',
        default=None,
        help=('Deprecated. Use --sdk_container_image instead.'))
    parser.add_argument(
        '--cloud_build_machine_type',
        default=None,
        help=(
            'If specified, use the machine type explicitly when prebuilding'
            'SDK container image on Google Cloud Build.'))
    parser.add_argument(
        '--docker_registry_push_url',
        default=None,
        help=(
            'Docker registry url to use for tagging and pushing the prebuilt '
            'sdk worker container image.'))
    parser.add_argument(
        '--gbek',
        default=None,
        help=(
            'When set, will replace all GroupByKey transforms in the pipeline '
            'with EncryptedGroupByKey transforms using the secret passed in '
            'the option. Beam will infer the secret type and value based on '
            'secret itself. This guarantees that any data at rest during the '
            'GBK will be encrypted. Many runners only store data at rest when '
            'performing a GBK, so this can be used to guarantee that data is '
            'not unencrypted. The secret should be a url safe base64 encoded '
            '32 byte value. To generate a secret in this format, you can use '
            'Secret.generate_secret_bytes(). For an example of this, see '
            'https://github.com/apache/beam/blob/c8df4da229da49d533491857e1bb4ab5dbf4fd37/sdks/python/apache_beam/transforms/util_test.py#L356. '  # pylint: disable=line-too-long
            'Runners with this behavior include the Dataflow, '
            'Flink, and Spark runners. The option should be '
            'structured like: '
            '--gbek=type:<secret_type>;<secret_param>:<value>, for example '
            '--gbek=type:GcpSecret;version_name:my_secret/versions/latest'))
    parser.add_argument(
        '--user_agent',
        default=None,
        help=(
            'A user agent string describing the pipeline to external services. '
            'The format should follow RFC2616.'))
    parser.add_argument(
        '--maven_repository_url',
        default=None,
        help=(
            'Custom Maven repository URL to use for downloading JAR files. '
            'If not specified, the default Maven Central repository will be '
            'used.'))