def register_subparser()

in tensorflow_datasets/scripts/cli/build.py [0:0]
122 lines of code
2 McCabe index (conditional complexity)

def register_subparser(parsers: argparse._SubParsersAction) -> None:  # pylint: disable=protected-access
  """Add subparser for `build` command."""
  build_parser = parsers.add_parser(
      'build', help='Commands for downloading and preparing datasets.')
  build_parser.add_argument(
      'datasets',  # Positional arguments
      type=str,
      nargs='*',
      help='Name(s) of the dataset(s) to build. Default to current dir. '
      'See https://www.tensorflow.org/datasets/cli for accepted values.',
  )
  build_parser.add_argument(  # Also accept keyword arguments
      '--datasets',
      type=str,
      nargs='+',
      dest='datasets_keyword',
      help='Datasets can also be provided as keyword argument.',
  )

  # **** Debug options ****
  debug_group = build_parser.add_argument_group(
      'Debug & tests',
      description='--pdb Enter post-mortem debugging mode '
      'if an exception is raised.')
  debug_group.add_argument(
      '--overwrite',
      action='store_true',
      help='Delete pre-existing dataset if it exists.',
  )
  debug_group.add_argument(
      '--max_examples_per_split',
      type=int,
      nargs='?',
      const=1,
      help='When set, only generate the first X examples (default to 1), rather '
      'than the full dataset.'
      'If set to 0, only execute the `_split_generators` (which download the '
      'original data), but skip `_generator_examples`',
  )

  # **** Path options ****
  path_group = build_parser.add_argument_group('Paths')
  path_group.add_argument(
      '--data_dir',
      type=tfds.core.as_path,
      # Should match tfds.core.constant.DATA_DIR !!
      default=tfds.core.as_path(
          os.environ.get('TFDS_DATA_DIR',
                         os.path.join('~', 'tensorflow_datasets'))),
      help='Where to place datasets. Default to '
      '`~/tensorflow_datasets/` or `TFDS_DATA_DIR` environement variable.',
  )
  path_group.add_argument(
      '--download_dir',
      type=tfds.core.as_path,
      help='Where to place downloads. Default to `<data_dir>/downloads/`.',
  )
  path_group.add_argument(
      '--extract_dir',
      type=tfds.core.as_path,
      help='Where to extract files. Default to `<download_dir>/extracted/`.',
  )
  path_group.add_argument(
      '--manual_dir',
      type=tfds.core.as_path,
      help='Where to manually download data (required for some datasets). '
      'Default to `<download_dir>/manual/`.',
  )
  path_group.add_argument(
      '--add_name_to_manual_dir',
      action='store_true',
      help='If true, append the dataset name to the `manual_dir` (e.g. '
      '`<download_dir>/manual/<dataset_name>/`. Useful to avoid collisions '
      'if many datasets are generated.')

  # **** Generation options ****
  generation_group = build_parser.add_argument_group('Generation')
  generation_group.add_argument(
      '--config',
      '-c',
      type=str,
      help='Config name to build. Build all configs if not set.')
  # We are forced to have 2 flags to avoid ambiguity when config name is
  # a number (e.g. `voc/2017`)
  generation_group.add_argument(
      '--config_idx',
      type=int,
      help='Config id to build (`builder_cls.BUILDER_CONFIGS[config_idx]`). '
      'Mutually exclusive with `--config`.')
  generation_group.add_argument(
      '--imports',
      '-i',
      type=str,
      help='Comma separated list of module to import to register datasets.')
  generation_group.add_argument(
      '--register_checksums',
      action='store_true',
      help='If True, store size and checksum of downloaded files.',
  )
  generation_group.add_argument(
      '--force_checksums_validation',
      action='store_true',
      help='If True, raise an error if the checksums are not found.',
  )
  generation_group.add_argument(
      '--beam_pipeline_options',
      type=str,
      # nargs='+',
      help='A (comma-separated) list of flags to pass to `PipelineOptions` '
      'when preparing with Apache Beam. '
      '(see: https://www.tensorflow.org/datasets/beam_datasets). '
      'Example: `--beam_pipeline_options=job_name=my-job,project=my-project`',
  )
  format_values = [f.value for f in tfds.core.FileFormat]
  generation_group.add_argument(
      '--file_format',
      type=str,
      help='File format to which generate the tf-examples. '
      f'Available values: {format_values} (see `tfds.core.FileFormat`).',
  )

  # **** Automation options ****
  automation_group = build_parser.add_argument_group(
      'Automation', description='Used by automated scripts.')
  automation_group.add_argument(
      '--exclude_datasets',
      type=str,
      help='If set, generate all datasets except the one defined here. '
      'Comma separated list of datasets to exclude. ')
  automation_group.add_argument(
      '--experimental_latest_version',
      action='store_true',
      help='Build the latest Version(experiments=...) available rather than '
      'default version.')

  build_parser.set_defaults(subparser_fn=_build_datasets)