in pathology/orchestrator/filter_file_generator.py [0:0]
def get_instances_for_deid(cohort: cohorts_pb2.PathologyCohort) -> List[str]:
"""Returns list of valid Instance URIs in a cohort for a Deid operation.
Removes any instances with metadata tags that would indicate it contains a
label or PHI.
Args:
cohort: Cohort to get instances from.
Raises:
HttpError if metadata retrieval fails.
JSONDecodeError if case is missing instances.
ValueError: Invalid env setting DEID_MAX_MAGNIFICATION.
"""
cloud_logging_client.info(
f'Generating Deid filter file for cohort {cohort.name}.'
)
# Gets credentials from the environment.
scoped_credentials = google.auth.default(scopes=_SCOPES)[0]
# Creates a requests Session object with the credentials.
session = requests.AuthorizedSession(scoped_credentials)
# URL to the Cloud Healthcare API endpoint and version
base_url = healthcare_api_const.HEALTHCARE_API_BASE_URL_FLG.value
if (
_max_magnification_flg_val() not in _SUPPORTED_MAGNIFICATIONS
and _is_deid_max_magnification_flag_enabled()
):
cloud_logging_client.critical(
'DEID_MAX_MAGNIFICATION does not define supported magnification'
f'; DEID_MAX_MAGNIFICATION={_DEID_MAX_MAGNIFICATION_FLG.value}; '
f'supported values={str(_SUPPORTED_MAGNIFICATIONS)}'
)
raise ValueError(
'DEID_MAX_MAGNIFICATION does not define supported magnification'
)
min_pixel_spacing_threshold = _get_deid_min_pixel_spacing()
instances = []
for slide in cohort.slides:
slide_path = get_full_path_from_dicom_uri(slide.dicom_uri)
query_url = f'{base_url}/{slide.dicom_uri}/{_LABEL_METADATA_QUERY}'
response = session.get(query_url, headers=_HEADERS)
try:
response.raise_for_status()
except urllib.error.HTTPError as exc:
cloud_logging_client.critical(
'Exception occurred in querying the DICOM Store.', exc
)
try:
for metadata in response.json():
instance_path = (
f'{slide_path}/instances/'
f'{metadata[_DICOM_INSTANCE_UID_TAG][_VALUE][0]}'
)
if _should_deid_instance(
metadata, instance_path, min_pixel_spacing_threshold
):
instances.append(instance_path)
except decoder.JSONDecodeError as exc:
cloud_logging_client.critical(
'Exception occurred in retrieving instances from cases in cohort.',
exc,
{'response_text': response.text, 'status_code': response.status_code},
)
if instances:
cloud_logging_client.info(
'Instances listed in DeID filter file.',
{'cohort name': cohort.name, 'instance_list': str(instances)},
)
else:
cloud_logging_client.warning(
'No instances listed in DeID filter file.', {'cohort name': cohort.name}
)
return instances