in sdks/python/apache_beam/options/pipeline_options.py [0:0]
def _add_argparse_args(cls, parser):
# Options for installing dependencies in the worker.
parser.add_argument(
'--requirements_file',
default=None,
help=(
'Path to a requirements file containing package dependencies. '
'Typically it is produced by a pip freeze command. More details: '
'https://pip.pypa.io/en/latest/reference/pip_freeze.html. '
'If used, all the packages specified will be downloaded, '
'cached (use --requirements_cache to change default location), '
'and then staged so that they can be automatically installed in '
'workers during startup. The cache is refreshed as needed '
'avoiding extra downloads for existing packages. Typically the '
'file is named requirements.txt.'))
parser.add_argument(
'--requirements_cache',
default=None,
help=(
'Path to a folder to cache the packages specified in '
'the requirements file using the --requirements_file option.'
'If you want to skip populating requirements cache, please '
'specify --requirements_cache="skip".'))
parser.add_argument(
'--requirements_cache_only_sources',
action='store_true',
help=(
'Enable this flag to populate requirements cache only '
'with Source distributions(sdists) of the dependencies '
'mentioned in the --requirements_file'
'Note: (BEAM-4032): This flag may significantly slow down '
'the pipeline submission. It is added to preserve the requirements'
' cache behavior prior to 2.37.0 and will likely be removed in '
'future releases.'))
parser.add_argument(
'--setup_file',
default=None,
help=(
'Path to a setup Python file containing package dependencies. If '
'specified, the file\'s containing folder is assumed to have the '
'structure required for a setuptools setup package. The file must '
'be named setup.py. More details: '
'https://pythonhosted.org/an_example_pypi_project/setuptools.html '
'During job submission a source distribution will be built and '
'the worker will install the resulting package before running any '
'custom code.'))
parser.add_argument(
'--beam_plugin',
'--beam_plugins',
dest='beam_plugins',
action='append',
default=None,
help=(
'Bootstrap the python process before executing any code by '
'importing all the plugins used in the pipeline. Please pass a '
'comma separated list of import paths to be included. This is '
'currently an experimental flag and provides no stability. '
'Multiple --beam_plugin options can be specified if more than '
'one plugin is needed.'))
parser.add_argument(
'--pickle_library',
default='default',
help=(
'Chooses which pickle library to use. Options are dill, '
'cloudpickle or default.'),
choices=['cloudpickle', 'default', 'dill', 'dill_unsafe'])
parser.add_argument(
'--save_main_session',
default=None,
action='store_true',
help=(
'Save the main session state so that pickled functions and classes '
'defined in __main__ (e.g. interactive session) can be unpickled. '
'Some workflows do not need the session state if for instance all '
'their functions/classes are defined in proper modules '
'(not __main__) and the modules are importable in the worker. '
'It is disabled by default except for cloudpickle as pickle '
'library on Dataflow runner.'))
parser.add_argument(
'--no_save_main_session',
default=None,
action='store_false',
dest='save_main_session',
help=(
'Disable saving the main session state. See "save_main_session".'))
parser.add_argument(
'--sdk_location',
default='default',
help=(
'Path to a custom Beam SDK package to install and use on the'
'runner. It can be a URL, a GCS path, or a local path to an '
'SDK tarball. Workflow submissions will download or copy an SDK '
'tarball from here. If set to "default", '
'runners will use the SDK provided in the default environment.'
'Use this flag when running pipelines with an unreleased or '
'manually patched version of Beam SDK.'))
parser.add_argument(
'--extra_package',
'--extra_packages',
dest='extra_packages',
action='append',
default=None,
help=(
'Local path to a Python package file. The file is expected to be '
'(1) a package tarball (".tar"), (2) a compressed package tarball '
'(".tar.gz"), (3) a Wheel file (".whl") or (4) a compressed '
'package zip file (".zip") which can be installed using the '
'"pip install" command of the standard pip package. Multiple '
'--extra_package options can be specified if more than one '
'package is needed. During job submission, the files will be '
'staged in the staging area (--staging_location option) and the '
'workers will install them in same order they were specified on '
'the command line.'))
parser.add_argument(
'--files_to_stage',
dest='files_to_stage',
action='append',
default=None,
help=(
'Local path to a file. During job submission, the files will be '
'staged in the staging area (--staging_location option) and then '
'workers will upload them to the worker specific staging location '
'(e.g. $SEMI_PERSISTENT_DIRECTORY/staged/ for portable runner.'))
parser.add_argument(
'--prebuild_sdk_container_engine',
help=(
'Prebuild sdk worker container image before job submission. If '
'enabled, SDK invokes the boot sequence in SDK worker '
'containers to install all pipeline dependencies in the '
'container, and uses the prebuilt image in the pipeline '
'environment. This may speed up pipeline execution. To enable, '
'select the Docker build engine: local_docker using '
'locally-installed Docker or cloud_build for using Google Cloud '
'Build (requires a GCP project with Cloud Build API enabled). You '
'can also subclass SdkContainerImageBuilder and use that to build '
'in other environments.'))
parser.add_argument(
'--prebuild_sdk_container_base_image',
default=None,
help=('Deprecated. Use --sdk_container_image instead.'))
parser.add_argument(
'--cloud_build_machine_type',
default=None,
help=(
'If specified, use the machine type explicitly when prebuilding'
'SDK container image on Google Cloud Build.'))
parser.add_argument(
'--docker_registry_push_url',
default=None,
help=(
'Docker registry url to use for tagging and pushing the prebuilt '
'sdk worker container image.'))
parser.add_argument(
'--gbek',
default=None,
help=(
'When set, will replace all GroupByKey transforms in the pipeline '
'with EncryptedGroupByKey transforms using the secret passed in '
'the option. Beam will infer the secret type and value based on '
'secret itself. This guarantees that any data at rest during the '
'GBK will be encrypted. Many runners only store data at rest when '
'performing a GBK, so this can be used to guarantee that data is '
'not unencrypted. The secret should be a url safe base64 encoded '
'32 byte value. To generate a secret in this format, you can use '
'Secret.generate_secret_bytes(). For an example of this, see '
'https://github.com/apache/beam/blob/c8df4da229da49d533491857e1bb4ab5dbf4fd37/sdks/python/apache_beam/transforms/util_test.py#L356. ' # pylint: disable=line-too-long
'Runners with this behavior include the Dataflow, '
'Flink, and Spark runners. The option should be '
'structured like: '
'--gbek=type:<secret_type>;<secret_param>:<value>, for example '
'--gbek=type:GcpSecret;version_name:my_secret/versions/latest'))
parser.add_argument(
'--user_agent',
default=None,
help=(
'A user agent string describing the pipeline to external services. '
'The format should follow RFC2616.'))
parser.add_argument(
'--maven_repository_url',
default=None,
help=(
'Custom Maven repository URL to use for downloading JAR files. '
'If not specified, the default Maven Central repository will be '
'used.'))