in tensorflow_datasets/scripts/cli/build.py [0:0]
def register_subparser(parsers: argparse._SubParsersAction) -> None: # pylint: disable=protected-access
"""Add subparser for `build` command."""
build_parser = parsers.add_parser(
'build', help='Commands for downloading and preparing datasets.')
build_parser.add_argument(
'datasets', # Positional arguments
type=str,
nargs='*',
help='Name(s) of the dataset(s) to build. Default to current dir. '
'See https://www.tensorflow.org/datasets/cli for accepted values.',
)
build_parser.add_argument( # Also accept keyword arguments
'--datasets',
type=str,
nargs='+',
dest='datasets_keyword',
help='Datasets can also be provided as keyword argument.',
)
# **** Debug options ****
debug_group = build_parser.add_argument_group(
'Debug & tests',
description='--pdb Enter post-mortem debugging mode '
'if an exception is raised.')
debug_group.add_argument(
'--overwrite',
action='store_true',
help='Delete pre-existing dataset if it exists.',
)
debug_group.add_argument(
'--max_examples_per_split',
type=int,
nargs='?',
const=1,
help='When set, only generate the first X examples (default to 1), rather '
'than the full dataset.'
'If set to 0, only execute the `_split_generators` (which download the '
'original data), but skip `_generator_examples`',
)
# **** Path options ****
path_group = build_parser.add_argument_group('Paths')
path_group.add_argument(
'--data_dir',
type=tfds.core.as_path,
# Should match tfds.core.constant.DATA_DIR !!
default=tfds.core.as_path(
os.environ.get('TFDS_DATA_DIR',
os.path.join('~', 'tensorflow_datasets'))),
help='Where to place datasets. Default to '
'`~/tensorflow_datasets/` or `TFDS_DATA_DIR` environement variable.',
)
path_group.add_argument(
'--download_dir',
type=tfds.core.as_path,
help='Where to place downloads. Default to `<data_dir>/downloads/`.',
)
path_group.add_argument(
'--extract_dir',
type=tfds.core.as_path,
help='Where to extract files. Default to `<download_dir>/extracted/`.',
)
path_group.add_argument(
'--manual_dir',
type=tfds.core.as_path,
help='Where to manually download data (required for some datasets). '
'Default to `<download_dir>/manual/`.',
)
path_group.add_argument(
'--add_name_to_manual_dir',
action='store_true',
help='If true, append the dataset name to the `manual_dir` (e.g. '
'`<download_dir>/manual/<dataset_name>/`. Useful to avoid collisions '
'if many datasets are generated.')
# **** Generation options ****
generation_group = build_parser.add_argument_group('Generation')
generation_group.add_argument(
'--config',
'-c',
type=str,
help='Config name to build. Build all configs if not set.')
# We are forced to have 2 flags to avoid ambiguity when config name is
# a number (e.g. `voc/2017`)
generation_group.add_argument(
'--config_idx',
type=int,
help='Config id to build (`builder_cls.BUILDER_CONFIGS[config_idx]`). '
'Mutually exclusive with `--config`.')
generation_group.add_argument(
'--imports',
'-i',
type=str,
help='Comma separated list of module to import to register datasets.')
generation_group.add_argument(
'--register_checksums',
action='store_true',
help='If True, store size and checksum of downloaded files.',
)
generation_group.add_argument(
'--force_checksums_validation',
action='store_true',
help='If True, raise an error if the checksums are not found.',
)
generation_group.add_argument(
'--beam_pipeline_options',
type=str,
# nargs='+',
help='A (comma-separated) list of flags to pass to `PipelineOptions` '
'when preparing with Apache Beam. '
'(see: https://www.tensorflow.org/datasets/beam_datasets). '
'Example: `--beam_pipeline_options=job_name=my-job,project=my-project`',
)
format_values = [f.value for f in tfds.core.FileFormat]
generation_group.add_argument(
'--file_format',
type=str,
help='File format to which generate the tf-examples. '
f'Available values: {format_values} (see `tfds.core.FileFormat`).',
)
# **** Automation options ****
automation_group = build_parser.add_argument_group(
'Automation', description='Used by automated scripts.')
automation_group.add_argument(
'--exclude_datasets',
type=str,
help='If set, generate all datasets except the one defined here. '
'Comma separated list of datasets to exclude. ')
automation_group.add_argument(
'--experimental_latest_version',
action='store_true',
help='Build the latest Version(experiments=...) available rather than '
'default version.')
build_parser.set_defaults(subparser_fn=_build_datasets)