def _get_arg_parser()

in pathology/transformation_pipeline/local/start_local.py [0:0]
386 lines of code
3 McCabe index (conditional complexity)

def _get_arg_parser() -> argparse.Namespace:
  """Parses commandline arguments."""
  arg_parser = argparse.ArgumentParser(
      description='Local Transformation Pipeline Configuration.'
  )
  # ----------------------------------------------------------------------------
  # General Container Authentication Configuration
  # ----------------------------------------------------------------------------
  arg_parser.add_argument('-H', action='help')

  arg_parser.add_argument(
      '-log',
      default=os.getenv('LOG', ''),
      help='File path to write transformation pipeline output to.',
  )

  arg_parser.add_argument(
      '-user_id',
      default=os.getenv('USER_ID', str(os.getuid())),
      help='Container user id; defaults to user id of the current user.',
  )

  arg_parser.add_argument(
      '-group_id',
      default=os.getenv('GROUP_ID', str(os.getgid())),
      help='Container group id; defaults to group id of the current user.',
  )

  arg_parser.add_argument(
      '-google_application_credentials',
      default=os.getenv(
          'GOOGLE_APPLICATION_CREDENTIALS', _discover_adc_credentials()
      ),
      help=(
          'File path to user credential json file that container will user to'
          ' authenticate to GCP. Only required if GCP services are used.'
      ),
  )

  arg_parser.add_argument(
      '-google_cloud_project',
      default=os.getenv('GOOGLE_CLOUD_PROJECT', ''),
      help=(
          'Google Cloud project hosting used DICOM store and other GCP'
          ' services. Only needed if GCP services are used.'
      ),
  )

  # ----------------------------------------------------------------------------
  # Input, Output, and DICOM store configurations.
  # ----------------------------------------------------------------------------
  arg_parser.add_argument(
      '-input_images',
      nargs='*',
      default=json.loads(os.getenv('INPUT_IMAGES', '[]')),
      help=(
          'Paths to image files to ingest. Path may describe either file on the'
          ' local file system or a file within a cloud bucket (gs style path).'
      ),
  )

  arg_parser.add_argument(
      '-poll',
      default=os.getenv('POLL_IMAGE_INGESTION_DIR', 'False'),
      type=_str_to_bool,
      help=(
          'Poll image ingestion directory for new images, container run'
          ' indefinitely.'
      ),
  )

  arg_parser.add_argument(
      '-metadata_dir',
      default=os.getenv('METADATA_DIR', ''),
      help='Local directory metadata is ingested from.',
  )

  arg_parser.add_argument(
      '-image_ingestion_dir',
      default=os.getenv('IMAGE_INGESTION_DIR', ''),
      help='Local directory containing imaging to be ingested.',
  )

  arg_parser.add_argument(
      '-processed_image_dir',
      default=os.getenv('PROCESSED_IMAGE_DIR', ''),
      help='Local directory images are copied/moved to after being processed.',
  )

  arg_parser.add_argument(
      '-dicom_store',
      default=os.getenv('DICOM_STORE'),
      help='Local directory or GCP DICOM store to write DICOM files.',
  )

  arg_parser.add_argument(
      '-pyramid_generation_config_path',
      default=os.getenv('PYRAMID_LAYER_GENERATION_CONFIG_PATH', ''),
      help='Path to file defining pyramid generation config.',
  )

  # ----------------------------------------------------------------------------
  # Metadata Input Configuration
  # ----------------------------------------------------------------------------
  arg_parser.add_argument(
      '-metadata_free_ingestion',
      default=os.getenv('METADATA_FREE_INGESTION', 'True'),
      type=_str_to_bool,
      help=(
          'If True pipeline will ingest images without requiring metadata. Each'
          ' image will be ingested into unique study and series instance uid.'
          ' DICOM PatientID will be set to the ingested file name.'
      ),
  )

  arg_parser.add_argument(
      '-create_missing_study_instance_uid',
      default=os.getenv('CREATE_MISSING_STUDY_INSTANCE_UID', 'True'),
      type=_str_to_bool,
      help=(
          'If CREATE_MISSING_STUDY_INSTANCE_UID="True" then the pipeline will'
          ' attempt to create missing Study Instance UID using slide accession'
          ' number. Requires Slide Accession numbers have a 1:1 mapping with'
          ' StudyInstanceUID.'
      ),
  )

  arg_parser.add_argument(
      '-dicom_study_instance_uid_source',
      default=os.getenv('DICOM_STUDY_INSTANCE_UID_SOURCE', 'DICOM'),
      choices=['DICOM', 'METADATA'],
      help=(
          'Defines the source of the DICOM Study Instance UID for image'
          ' transformation triggered using DICOM imaging. DICOM: Sets the study'
          ' instance uid in generated imaging to the value encoded in the'
          ' source imaging. METADATA: Sets the study instance uid in generated'
          ' imaging to the value encoded in the source imaging metadata.'
      ),
  )

  arg_parser.add_argument(
      '-whole_filename_metadata_primary_key',
      default=os.getenv('WHOLE_FILENAME_METADATA_PRIMARY_KEY', 'True'),
      type=_str_to_bool,
      help=(
          'If True then the whole file name will be tested as candidate'
          ' metadata primary key.'
      ),
  )

  arg_parser.add_argument(
      '-include_upload_path_in_whole_filename_metadata_primary_key',
      default=os.getenv(
          'INCLUDE_UPLOAD_PATH_IN_WHOLE_FILENAME_METADATA_PRIMARY_KEY', 'False'
      ),
      type=_str_to_bool,
      help=(
          'If true and file is placed within a sub-directory in the image'
          ' ingestion bucket then whole filename metadata primary key will'
          ' include the sub-directorys. If false the the metadata primary key'
          ' is just the base portion of the filename.'
      ),
  )

  arg_parser.add_argument(
      '-filename_metadata_primary_key_split_str',
      default=os.getenv('FILENAME_METADATA_PRIMARY_KEY_SPLIT_STR', '_'),
      help=(
          'Character or string that file name will split on to identify'
          ' candidate slide metadata primary keys.'
      ),
  )

  arg_parser.add_argument(
      '-metadata_primary_key_regex',
      default=os.getenv(
          'METADATA_PRIMARY_KEY_REGEX',
          '^[a-zA-Z0-9]+-[a-zA-Z0-9]+(-[a-zA-Z0-9]+)+',
      ),
      help=(
          'Regular expression used to validate candidate metadata primary keys'
          ' in filename. Whole file names are not tested against the regular'
          ' expression. Default regular expression matches 3 or more hyphen'
          ' separated alpha-numeric blocks, e.g. SR-21-2 and SR-21-2-B1-5.'
      ),
  )

  arg_parser.add_argument(
      '-metadata_primary_key_column_name',
      default=os.getenv('METADATA_PRIMARY_KEY_COLUMN_NAME', 'Bar Code Value'),
      help=(
          'Column name used as primary key for joining  BigQuery or CSV'
          ' metadata with imaging.'
      ),
  )

  arg_parser.add_argument(
      '-barcode_decoder',
      default=os.getenv('BARCODE_DECODER', 'True'),
      type=_str_to_bool,
      help=(
          'If the metadata primary key cannot be identified using other'
          ' mechanisms attempts to identify key by decoding barcodes placed in'
          ' slide label imaging.'
      ),
  )

  arg_parser.add_argument(
      '-cloud_vision_barcode_segmentation',
      default=os.getenv('CLOUD_VISION_BARCODE_SEGMENTATION', 'False'),
      type=_str_to_bool,
      help=(
          'Use cloud vision barcode segmentation to improve barcode decoding.'
          ' Requires GCP connectivitiy. mechanisms attempts to identify key by'
          ' decoding barcodes placed in slide label imaging.'
      ),
  )

  arg_parser.add_argument(
      '-big_query',
      default=os.getenv('BIG_QUERY', ''),
      help=(
          'Sets Big Query as the source for slide metadata. Requires GCP'
          ' connectivity To configure set value to: '
          ' "project_id.dataset_id.table_name" of table holding metadata.'
          ' Disabled if initalized to "". If undefined, slide metadata defined'
          ' by placing CSV files in metadata ingestion directory.'
      ),
  )

  # ----------------------------------------------------------------------------
  # Metadata Validation Configuration
  # ----------------------------------------------------------------------------

  arg_parser.add_argument(
      '-metadata_uid_validation',
      default=os.getenv('METADATA_UID_VALIDATION', 'LOG_WARNING'),
      choices=['NONE', 'LOG_WARNING', 'ERROR'],
      help=(
          'Test and optionally error if UID values defined in metadata are'
          ' formatted incorrectly.'
          ' https://dicom.nema.org/dicom/2013/output/chtml/part05/chapter_9.html'
      ),
  )

  arg_parser.add_argument(
      '-metadata_tag_length_validation',
      default=os.getenv('METADATA_TAG_LENGTH_VALIDATION', 'LOG_WARNING'),
      choices=['NONE', 'LOG_WARNING', 'LOG_WARNING_AND_CLIP', 'ERROR'],
      help=(
          'Test and optionally error if values defined in metadata exceed dicom'
          ' standard length limits.'
          ' https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_6.2.html'
      ),
  )

  arg_parser.add_argument(
      '-require_type1_dicom_tag_metadata_are_defined',
      default=os.getenv(
          'REQUIRE_TYPE1_DICOM_TAG_METADATA_ARE_DEFINED', 'False'
      ),
      type=_str_to_bool,
      help=(
          'Require all type one tags defined in the metadata schema have'
          ' defined values; if not raise MissingRequiredMetadataValueError'
          ' exception and fail ingestion.'
      ),
  )

  # ----------------------------------------------------------------------------
  # WSI Pyramid Generation Configuration.
  # ----------------------------------------------------------------------------
  # When imaging is ingested using pixel equivalent methods frame dimensions are
  # set to match those in the source imaging. When non-pixel equivalent methods
  # are used frame dimensions in pixels are defined by FRAME_HEIGHT
  # and FRAME_WIDTH. It is generally recommended that frame
  # dimensions be defined as being square with, edge dimensions 256 to 512
  # pixels.
  arg_parser.add_argument(
      '-frame_height',
      default=os.getenv('FRAME_HEIGHT', '256'),
      type=int,
      help=(
          'DICOM frame height in generated imaging when non-pixel equivalent'
          ' transform is used'
      ),
  )

  arg_parser.add_argument(
      '-frame_width',
      default=os.getenv('FRAME_WIDTH', '256'),
      type=int,
      help=(
          'DICOM frame width in generated imaging when non-pixel equivalent'
          ' transform is used'
      ),
  )

  arg_parser.add_argument(
      '-compression',
      default=os.getenv('COMPRESSION', 'JPEG'),
      choices=['JPEG', 'JPEG2000', 'RAW'],
      help=(
          'Image compression to use when encoding pixels; supported formats:'
          ' JPEG, JPEG2000, or RAW (uncompressed).'
      ),
  )

  arg_parser.add_argument(
      '-jpeg_quality',
      default=os.getenv('JPEG_QUALITY', '95'),
      type=int,
      help='JPEG compression quality range 1 - 100.',
  )

  arg_parser.add_argument(
      '-jpeg_subsampling',
      default=os.getenv('JPEG_SUBSAMPLING', 'SUBSAMPLE_444'),
      choices=[
          'SUBSAMPLE_444',
          'SUBSAMPLE_440',
          'SUBSAMPLE_442',
          'SUBSAMPLE_420',
      ],
      help='JPEG compression subsampling.',
  )

  arg_parser.add_argument(
      '-icc_profile',
      default=os.getenv('ICC_PROFILE', 'True'),
      type=_str_to_bool,
      help='Embed source imaging ICC profile in generated imaging.',
  )

  arg_parser.add_argument(
      '-pixel_equivalent_transform',
      default=os.getenv('PIXEL_EQUIVALENT_TRANSFORM', 'HIGHEST_MAGNIFICATION'),
      choices=['DISABLED', 'HIGHEST_MAGNIFICATION', 'ALL_LEVELS'],
      help='Levels are processed using pixel equivalent transformation.',
  )

  # ----------------------------------------------------------------------------
  # MISC WSI Pyramid Configuration.
  # ----------------------------------------------------------------------------
  arg_parser.add_argument(
      '-uid_prefix',
      default=os.getenv('UID_PREFIX', '1.3.6.1.4.1.11129.5.7'),
      help=(
          'DICOM UID prefix to prefix generated DICOM with. The prefix is'
          ' required to'
          ' start with the Google Digital Pathology prefix'
          ' "1.3.6.1.4.1.11129.5.7"'
          ' the prefix may include an optional customer suffix, additional 7'
          ' characters. Characters must conform to the DICOM standard UID'
          ' requirements. '
          ' https://dicom.nema.org/dicom/2013/output/chtml/part05/chapter_9.html'
      ),
  )

  arg_parser.add_argument(
      '-ignore_root_dirs',
      default=json.loads(
          os.getenv('IGNORE_ROOT_DIRS', '["cloud-ingest", "storage-transfer"]')
      ),
      nargs='*',
      help=(
          'Files placed in listed root directories within the ingestion'
          ' directory will be ignored.'
      ),
  )

  arg_parser.add_argument(
      '-ignore_file_exts',
      default=json.loads(os.getenv('IGNORE_FILE_EXT', '[]')),
      nargs='*',
      help=(
          'list of file extensions (e.g., ".json") which will be ignored by the'
          ' transformation pipeline.'
      ),
  )

  arg_parser.add_argument(
      '-move_image_on_ingest_sucess_or_failure',
      default=os.getenv('MOVE_IMAGE_ON_INGEST_SUCCESS_OR_FAILURE', 'True'),
      type=_str_to_bool,
      help=(
          'If true imaging will be moved from ingestion bucket to success or'
          ' failure folder in the output folder when transformation completes.'
          ' If False, the file will be copied to the success/failure and not'
          ' removed from the ingestion bucket. This setting is to support RSYNC'
          ' or similar driven streaming pipeline execution which require copied'
          ' files to remain in place to avoid repeated file transfers.'
      ),
  )

  # ----------------------------------------------------------------------------
  # MISC Local host runner configuration.
  # ----------------------------------------------------------------------------

  arg_parser.add_argument(
      '-cloud_ops_logging',
      default=os.getenv('CLOUD_OPS_LOGGING', 'False'),
      type=_str_to_bool,
      help='Enables publishing transformation logs to cloud operations.',
  )

  arg_parser.add_argument(
      '-ops_log_name',
      default=os.getenv('CLOUD_OPS_LOG_NAME', 'transformation_pipeline'),
      help='Cloud ops log name to write logs to.',
  )

  arg_parser.add_argument(
      '-ops_log_project',
      default=os.getenv('CLOUD_OPS_LOG_PROJECT'),
      help='GCP project name to write cloud ops log to. Undefined = default',
  )

  # ----------------------------------------------------------------------------
  # MISC Local host runner configuration.
  # ----------------------------------------------------------------------------

  arg_parser.add_argument(
      '-docker_container_name',
      default=os.getenv(
          'DOCKER_CONTAINER_NAME', 'local_transform_pipeline_docker_container'
      ),
      help='Name of the docker container.',
  )

  arg_parser.add_argument(
      '-running_docker_instance_name',
      default=os.getenv(
          'RUNNING_DOCKER_INSTANCE_NAME', 'local_transform_pipeline'
      ),
      help='Name of the running docker instance.',
  )

  arg_parser.add_argument(
      '-max_file_copy_threads',
      default=os.getenv('MAX_FILE_COPY_THREADS', '3'),
      type=int,
      help='Maximum number of file copy threads.',
  )

  arg_parser.add_argument(
      '-log_environment_variables',
      default=os.getenv('LOG_ENVIRONMENT_VARIABLES', 'False'),
      type=_str_to_bool,
      help=(
          'Echo enviromental variable settings used to start transform'
          ' pipeline; does not start pipleine.'
      ),
  )

  arg_parser.add_argument(
      '-write_docker_run_shellscript',
      default=os.getenv('WRITE_DOCKER_RUN_SHELLSCRIPT', ''),
      help=(
          'Name of path to a file to write a shell script which launches the'
          ' configured localhost transformation pipeline; does'
          ' not start pipleine.'
      ),
  )
  return arg_parser.parse_args()