in pathology/transformation_pipeline/local/start_local.py [0:0]
def _get_arg_parser() -> argparse.Namespace:
"""Parses commandline arguments."""
arg_parser = argparse.ArgumentParser(
description='Local Transformation Pipeline Configuration.'
)
# ----------------------------------------------------------------------------
# General Container Authentication Configuration
# ----------------------------------------------------------------------------
arg_parser.add_argument('-H', action='help')
arg_parser.add_argument(
'-log',
default=os.getenv('LOG', ''),
help='File path to write transformation pipeline output to.',
)
arg_parser.add_argument(
'-user_id',
default=os.getenv('USER_ID', str(os.getuid())),
help='Container user id; defaults to user id of the current user.',
)
arg_parser.add_argument(
'-group_id',
default=os.getenv('GROUP_ID', str(os.getgid())),
help='Container group id; defaults to group id of the current user.',
)
arg_parser.add_argument(
'-google_application_credentials',
default=os.getenv(
'GOOGLE_APPLICATION_CREDENTIALS', _discover_adc_credentials()
),
help=(
'File path to user credential json file that container will user to'
' authenticate to GCP. Only required if GCP services are used.'
),
)
arg_parser.add_argument(
'-google_cloud_project',
default=os.getenv('GOOGLE_CLOUD_PROJECT', ''),
help=(
'Google Cloud project hosting used DICOM store and other GCP'
' services. Only needed if GCP services are used.'
),
)
# ----------------------------------------------------------------------------
# Input, Output, and DICOM store configurations.
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-input_images',
nargs='*',
default=json.loads(os.getenv('INPUT_IMAGES', '[]')),
help=(
'Paths to image files to ingest. Path may describe either file on the'
' local file system or a file within a cloud bucket (gs style path).'
),
)
arg_parser.add_argument(
'-poll',
default=os.getenv('POLL_IMAGE_INGESTION_DIR', 'False'),
type=_str_to_bool,
help=(
'Poll image ingestion directory for new images, container run'
' indefinitely.'
),
)
arg_parser.add_argument(
'-metadata_dir',
default=os.getenv('METADATA_DIR', ''),
help='Local directory metadata is ingested from.',
)
arg_parser.add_argument(
'-image_ingestion_dir',
default=os.getenv('IMAGE_INGESTION_DIR', ''),
help='Local directory containing imaging to be ingested.',
)
arg_parser.add_argument(
'-processed_image_dir',
default=os.getenv('PROCESSED_IMAGE_DIR', ''),
help='Local directory images are copied/moved to after being processed.',
)
arg_parser.add_argument(
'-dicom_store',
default=os.getenv('DICOM_STORE'),
help='Local directory or GCP DICOM store to write DICOM files.',
)
arg_parser.add_argument(
'-pyramid_generation_config_path',
default=os.getenv('PYRAMID_LAYER_GENERATION_CONFIG_PATH', ''),
help='Path to file defining pyramid generation config.',
)
# ----------------------------------------------------------------------------
# Metadata Input Configuration
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-metadata_free_ingestion',
default=os.getenv('METADATA_FREE_INGESTION', 'True'),
type=_str_to_bool,
help=(
'If True pipeline will ingest images without requiring metadata. Each'
' image will be ingested into unique study and series instance uid.'
' DICOM PatientID will be set to the ingested file name.'
),
)
arg_parser.add_argument(
'-create_missing_study_instance_uid',
default=os.getenv('CREATE_MISSING_STUDY_INSTANCE_UID', 'True'),
type=_str_to_bool,
help=(
'If CREATE_MISSING_STUDY_INSTANCE_UID="True" then the pipeline will'
' attempt to create missing Study Instance UID using slide accession'
' number. Requires Slide Accession numbers have a 1:1 mapping with'
' StudyInstanceUID.'
),
)
arg_parser.add_argument(
'-dicom_study_instance_uid_source',
default=os.getenv('DICOM_STUDY_INSTANCE_UID_SOURCE', 'DICOM'),
choices=['DICOM', 'METADATA'],
help=(
'Defines the source of the DICOM Study Instance UID for image'
' transformation triggered using DICOM imaging. DICOM: Sets the study'
' instance uid in generated imaging to the value encoded in the'
' source imaging. METADATA: Sets the study instance uid in generated'
' imaging to the value encoded in the source imaging metadata.'
),
)
arg_parser.add_argument(
'-whole_filename_metadata_primary_key',
default=os.getenv('WHOLE_FILENAME_METADATA_PRIMARY_KEY', 'True'),
type=_str_to_bool,
help=(
'If True then the whole file name will be tested as candidate'
' metadata primary key.'
),
)
arg_parser.add_argument(
'-include_upload_path_in_whole_filename_metadata_primary_key',
default=os.getenv(
'INCLUDE_UPLOAD_PATH_IN_WHOLE_FILENAME_METADATA_PRIMARY_KEY', 'False'
),
type=_str_to_bool,
help=(
'If true and file is placed within a sub-directory in the image'
' ingestion bucket then whole filename metadata primary key will'
' include the sub-directorys. If false the the metadata primary key'
' is just the base portion of the filename.'
),
)
arg_parser.add_argument(
'-filename_metadata_primary_key_split_str',
default=os.getenv('FILENAME_METADATA_PRIMARY_KEY_SPLIT_STR', '_'),
help=(
'Character or string that file name will split on to identify'
' candidate slide metadata primary keys.'
),
)
arg_parser.add_argument(
'-metadata_primary_key_regex',
default=os.getenv(
'METADATA_PRIMARY_KEY_REGEX',
'^[a-zA-Z0-9]+-[a-zA-Z0-9]+(-[a-zA-Z0-9]+)+',
),
help=(
'Regular expression used to validate candidate metadata primary keys'
' in filename. Whole file names are not tested against the regular'
' expression. Default regular expression matches 3 or more hyphen'
' separated alpha-numeric blocks, e.g. SR-21-2 and SR-21-2-B1-5.'
),
)
arg_parser.add_argument(
'-metadata_primary_key_column_name',
default=os.getenv('METADATA_PRIMARY_KEY_COLUMN_NAME', 'Bar Code Value'),
help=(
'Column name used as primary key for joining BigQuery or CSV'
' metadata with imaging.'
),
)
arg_parser.add_argument(
'-barcode_decoder',
default=os.getenv('BARCODE_DECODER', 'True'),
type=_str_to_bool,
help=(
'If the metadata primary key cannot be identified using other'
' mechanisms attempts to identify key by decoding barcodes placed in'
' slide label imaging.'
),
)
arg_parser.add_argument(
'-cloud_vision_barcode_segmentation',
default=os.getenv('CLOUD_VISION_BARCODE_SEGMENTATION', 'False'),
type=_str_to_bool,
help=(
'Use cloud vision barcode segmentation to improve barcode decoding.'
' Requires GCP connectivitiy. mechanisms attempts to identify key by'
' decoding barcodes placed in slide label imaging.'
),
)
arg_parser.add_argument(
'-big_query',
default=os.getenv('BIG_QUERY', ''),
help=(
'Sets Big Query as the source for slide metadata. Requires GCP'
' connectivity To configure set value to: '
' "project_id.dataset_id.table_name" of table holding metadata.'
' Disabled if initalized to "". If undefined, slide metadata defined'
' by placing CSV files in metadata ingestion directory.'
),
)
# ----------------------------------------------------------------------------
# Metadata Validation Configuration
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-metadata_uid_validation',
default=os.getenv('METADATA_UID_VALIDATION', 'LOG_WARNING'),
choices=['NONE', 'LOG_WARNING', 'ERROR'],
help=(
'Test and optionally error if UID values defined in metadata are'
' formatted incorrectly.'
' https://dicom.nema.org/dicom/2013/output/chtml/part05/chapter_9.html'
),
)
arg_parser.add_argument(
'-metadata_tag_length_validation',
default=os.getenv('METADATA_TAG_LENGTH_VALIDATION', 'LOG_WARNING'),
choices=['NONE', 'LOG_WARNING', 'LOG_WARNING_AND_CLIP', 'ERROR'],
help=(
'Test and optionally error if values defined in metadata exceed dicom'
' standard length limits.'
' https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_6.2.html'
),
)
arg_parser.add_argument(
'-require_type1_dicom_tag_metadata_are_defined',
default=os.getenv(
'REQUIRE_TYPE1_DICOM_TAG_METADATA_ARE_DEFINED', 'False'
),
type=_str_to_bool,
help=(
'Require all type one tags defined in the metadata schema have'
' defined values; if not raise MissingRequiredMetadataValueError'
' exception and fail ingestion.'
),
)
# ----------------------------------------------------------------------------
# WSI Pyramid Generation Configuration.
# ----------------------------------------------------------------------------
# When imaging is ingested using pixel equivalent methods frame dimensions are
# set to match those in the source imaging. When non-pixel equivalent methods
# are used frame dimensions in pixels are defined by FRAME_HEIGHT
# and FRAME_WIDTH. It is generally recommended that frame
# dimensions be defined as being square with, edge dimensions 256 to 512
# pixels.
arg_parser.add_argument(
'-frame_height',
default=os.getenv('FRAME_HEIGHT', '256'),
type=int,
help=(
'DICOM frame height in generated imaging when non-pixel equivalent'
' transform is used'
),
)
arg_parser.add_argument(
'-frame_width',
default=os.getenv('FRAME_WIDTH', '256'),
type=int,
help=(
'DICOM frame width in generated imaging when non-pixel equivalent'
' transform is used'
),
)
arg_parser.add_argument(
'-compression',
default=os.getenv('COMPRESSION', 'JPEG'),
choices=['JPEG', 'JPEG2000', 'RAW'],
help=(
'Image compression to use when encoding pixels; supported formats:'
' JPEG, JPEG2000, or RAW (uncompressed).'
),
)
arg_parser.add_argument(
'-jpeg_quality',
default=os.getenv('JPEG_QUALITY', '95'),
type=int,
help='JPEG compression quality range 1 - 100.',
)
arg_parser.add_argument(
'-jpeg_subsampling',
default=os.getenv('JPEG_SUBSAMPLING', 'SUBSAMPLE_444'),
choices=[
'SUBSAMPLE_444',
'SUBSAMPLE_440',
'SUBSAMPLE_442',
'SUBSAMPLE_420',
],
help='JPEG compression subsampling.',
)
arg_parser.add_argument(
'-icc_profile',
default=os.getenv('ICC_PROFILE', 'True'),
type=_str_to_bool,
help='Embed source imaging ICC profile in generated imaging.',
)
arg_parser.add_argument(
'-pixel_equivalent_transform',
default=os.getenv('PIXEL_EQUIVALENT_TRANSFORM', 'HIGHEST_MAGNIFICATION'),
choices=['DISABLED', 'HIGHEST_MAGNIFICATION', 'ALL_LEVELS'],
help='Levels are processed using pixel equivalent transformation.',
)
# ----------------------------------------------------------------------------
# MISC WSI Pyramid Configuration.
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-uid_prefix',
default=os.getenv('UID_PREFIX', '1.3.6.1.4.1.11129.5.7'),
help=(
'DICOM UID prefix to prefix generated DICOM with. The prefix is'
' required to'
' start with the Google Digital Pathology prefix'
' "1.3.6.1.4.1.11129.5.7"'
' the prefix may include an optional customer suffix, additional 7'
' characters. Characters must conform to the DICOM standard UID'
' requirements. '
' https://dicom.nema.org/dicom/2013/output/chtml/part05/chapter_9.html'
),
)
arg_parser.add_argument(
'-ignore_root_dirs',
default=json.loads(
os.getenv('IGNORE_ROOT_DIRS', '["cloud-ingest", "storage-transfer"]')
),
nargs='*',
help=(
'Files placed in listed root directories within the ingestion'
' directory will be ignored.'
),
)
arg_parser.add_argument(
'-ignore_file_exts',
default=json.loads(os.getenv('IGNORE_FILE_EXT', '[]')),
nargs='*',
help=(
'list of file extensions (e.g., ".json") which will be ignored by the'
' transformation pipeline.'
),
)
arg_parser.add_argument(
'-move_image_on_ingest_sucess_or_failure',
default=os.getenv('MOVE_IMAGE_ON_INGEST_SUCCESS_OR_FAILURE', 'True'),
type=_str_to_bool,
help=(
'If true imaging will be moved from ingestion bucket to success or'
' failure folder in the output folder when transformation completes.'
' If False, the file will be copied to the success/failure and not'
' removed from the ingestion bucket. This setting is to support RSYNC'
' or similar driven streaming pipeline execution which require copied'
' files to remain in place to avoid repeated file transfers.'
),
)
# ----------------------------------------------------------------------------
# MISC Local host runner configuration.
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-cloud_ops_logging',
default=os.getenv('CLOUD_OPS_LOGGING', 'False'),
type=_str_to_bool,
help='Enables publishing transformation logs to cloud operations.',
)
arg_parser.add_argument(
'-ops_log_name',
default=os.getenv('CLOUD_OPS_LOG_NAME', 'transformation_pipeline'),
help='Cloud ops log name to write logs to.',
)
arg_parser.add_argument(
'-ops_log_project',
default=os.getenv('CLOUD_OPS_LOG_PROJECT'),
help='GCP project name to write cloud ops log to. Undefined = default',
)
# ----------------------------------------------------------------------------
# MISC Local host runner configuration.
# ----------------------------------------------------------------------------
arg_parser.add_argument(
'-docker_container_name',
default=os.getenv(
'DOCKER_CONTAINER_NAME', 'local_transform_pipeline_docker_container'
),
help='Name of the docker container.',
)
arg_parser.add_argument(
'-running_docker_instance_name',
default=os.getenv(
'RUNNING_DOCKER_INSTANCE_NAME', 'local_transform_pipeline'
),
help='Name of the running docker instance.',
)
arg_parser.add_argument(
'-max_file_copy_threads',
default=os.getenv('MAX_FILE_COPY_THREADS', '3'),
type=int,
help='Maximum number of file copy threads.',
)
arg_parser.add_argument(
'-log_environment_variables',
default=os.getenv('LOG_ENVIRONMENT_VARIABLES', 'False'),
type=_str_to_bool,
help=(
'Echo enviromental variable settings used to start transform'
' pipeline; does not start pipleine.'
),
)
arg_parser.add_argument(
'-write_docker_run_shellscript',
default=os.getenv('WRITE_DOCKER_RUN_SHELLSCRIPT', ''),
help=(
'Name of path to a file to write a shell script which launches the'
' configured localhost transformation pipeline; does'
' not start pipleine.'
),
)
return arg_parser.parse_args()