def get_instances_for_deid()

in pathology/orchestrator/filter_file_generator.py [0:0]


def get_instances_for_deid(cohort: cohorts_pb2.PathologyCohort) -> List[str]:
  """Returns list of valid Instance URIs in a cohort for a Deid operation.

  Removes any instances with metadata tags that would indicate it contains a
  label or PHI.

  Args:
    cohort: Cohort to get instances from.

  Raises:
    HttpError if metadata retrieval fails.
    JSONDecodeError if case is missing instances.
    ValueError: Invalid env setting DEID_MAX_MAGNIFICATION.
  """
  cloud_logging_client.info(
      f'Generating Deid filter file for cohort {cohort.name}.'
  )

  # Gets credentials from the environment.
  scoped_credentials = google.auth.default(scopes=_SCOPES)[0]

  # Creates a requests Session object with the credentials.
  session = requests.AuthorizedSession(scoped_credentials)

  # URL to the Cloud Healthcare API endpoint and version
  base_url = healthcare_api_const.HEALTHCARE_API_BASE_URL_FLG.value

  if (
      _max_magnification_flg_val() not in _SUPPORTED_MAGNIFICATIONS
      and _is_deid_max_magnification_flag_enabled()
  ):
    cloud_logging_client.critical(
        'DEID_MAX_MAGNIFICATION does not define supported magnification'
        f'; DEID_MAX_MAGNIFICATION={_DEID_MAX_MAGNIFICATION_FLG.value}; '
        f'supported values={str(_SUPPORTED_MAGNIFICATIONS)}'
    )
    raise ValueError(
        'DEID_MAX_MAGNIFICATION does not define supported magnification'
    )
  min_pixel_spacing_threshold = _get_deid_min_pixel_spacing()

  instances = []
  for slide in cohort.slides:
    slide_path = get_full_path_from_dicom_uri(slide.dicom_uri)
    query_url = f'{base_url}/{slide.dicom_uri}/{_LABEL_METADATA_QUERY}'
    response = session.get(query_url, headers=_HEADERS)
    try:
      response.raise_for_status()
    except urllib.error.HTTPError as exc:
      cloud_logging_client.critical(
          'Exception occurred in querying the DICOM Store.', exc
      )
    try:
      for metadata in response.json():
        instance_path = (
            f'{slide_path}/instances/'
            f'{metadata[_DICOM_INSTANCE_UID_TAG][_VALUE][0]}'
        )
        if _should_deid_instance(
            metadata, instance_path, min_pixel_spacing_threshold
        ):
          instances.append(instance_path)
    except decoder.JSONDecodeError as exc:
      cloud_logging_client.critical(
          'Exception occurred in retrieving instances from cases in cohort.',
          exc,
          {'response_text': response.text, 'status_code': response.status_code},
      )

  if instances:
    cloud_logging_client.info(
        'Instances listed in DeID filter file.',
        {'cohort name': cohort.name, 'instance_list': str(instances)},
    )
  else:
    cloud_logging_client.warning(
        'No instances listed in DeID filter file.', {'cohort name': cohort.name}
    )
  return instances