def _sample_negative_patches()

in tensorflow_datasets/image_classification/cbis_ddsm.py [0:0]


def _sample_negative_patches(image,
                             image_filepath,
                             abnormalities_masks,
                             abnormalities_areas,
                             patch_size,
                             number_of_patches=10,
                             min_breast_overlap_threshold=0.75,
                             max_abnorm_overlap_threshold=0.35,
                             max_number_of_trials_per_threshold=100):
  """Sample background patches from the image.

  The relative area of breast tissue in the patch must be, at least,
  `min_breast_overlap_threshold` of the total patch area. This is to prevent
  too easy negative examples.

  Similarly, the relative area of the abnormal tissue in the patch must be,
  at most, `max_abnorm_overlap_threshold`

  The relative area of the patch must overlap with the breast tissue with,
  at least, `min_breast_overlap_threshold` (relative) pixels.
  In addition, it must also overlap with abnormal tissue with, at most,
  `max_abnorm_overlap_threshold` (relative) pixels.

  Args:
    image: Image to patch from.
    image_filepath: Only used for logging.
    abnormalities_masks: List of binary mask of each abnormality in the image.
    abnormalities_areas: List of precomputed area of each abnormality.
    patch_size: Size of the patch to extract.
    number_of_patches: Number of negative patches to sample from the image.
    min_breast_overlap_threshold: Minimum (relative) number of breast pixels in
      the patch.
    max_abnorm_overlap_threshold: Maximum (relative) number of abnormal pixels
      in the patch.
    max_number_of_trials_per_threshold: Maximum number of random samples to try
      before reducing the `min_breast_overlap_threshold` by 5% and increasing
      the `max_abnorm_overlap_threshold` by 5%.

  Yields:
    The patch cropped from the input image.
  """
  cv2 = tfds.core.lazy_imports.cv2

  breast_mask = _get_breast_mask(image)

  def patch_overlapping_breast_is_feasible(y, x):
    """Return True if the patch contains enough breast pixels."""
    breast_in_patch = breast_mask[y:(y + patch_size[0]), x:(x + patch_size[1])]
    return (np.sum(breast_in_patch > 0) /
            (patch_size[0] * patch_size[1]) > min_breast_overlap_threshold)

  breast_roi = _get_roi_from_mask(breast_mask)
  breast_x, breast_y, breast_w, breast_h = cv2.boundingRect(breast_roi)
  number_of_yielded_patches = 0
  while (min_breast_overlap_threshold > 0.1 and
         max_abnorm_overlap_threshold < 0.9):
    # Determine the region where random samples should be sampled from.
    max_h, min_h = max(breast_h, patch_size[0]), min(breast_h, patch_size[0])
    max_w, min_w = max(breast_w, patch_size[1]), min(breast_w, patch_size[1])
    min_y = breast_y - int((1.0 - min_breast_overlap_threshold) * max_h)
    min_x = breast_x - int((1.0 - min_breast_overlap_threshold) * max_w)
    max_y = breast_y + breast_h - int(min_breast_overlap_threshold * min_h)
    max_x = breast_x + breast_w - int(min_breast_overlap_threshold * min_w)
    # Ensure that all sampled batches are within the image.
    min_y = max(min_y, 0)
    min_x = max(min_x, 0)
    max_y = max(min(max_y, image.shape[0] - patch_size[0] - 1), min_y)
    max_x = max(min(max_x, image.shape[1] - patch_size[1] - 1), min_x)
    # Cap the number of trials if the sampling region is too small.
    effective_range_size = max_number_of_trials_per_threshold
    if (max_y - min_y + 1) * (max_x - min_x + 1) < effective_range_size:
      logging.debug(
          'The sampling region for negative patches of size %r with '
          'min_breast_overlap_threshold=%f contains less possible patches '
          'than max_number_of_trials_per_threshold=%d, in mammography %s',
          patch_size, min_breast_overlap_threshold,
          max_number_of_trials_per_threshold, image_filepath)
      effective_range_size = (max_y - min_y + 1) * (max_x - min_x + 1)
    for _ in range(effective_range_size):
      patch_y = np.random.randint(min_y, max_y + 1)
      patch_x = np.random.randint(min_x, max_x + 1)
      if (patch_overlapping_breast_is_feasible(patch_y, patch_x) and
          not _patch_overlaps_any_abnormality_above_threshold(
              patch_y, patch_x, patch_size, abnormalities_masks,
              abnormalities_areas, max_abnorm_overlap_threshold)):
        number_of_yielded_patches += 1
        yield image[patch_y:(patch_y + patch_size[0]),
                    patch_x:(patch_x + patch_size[1])]
      # If we have yielded all requested patches return.
      if number_of_yielded_patches >= number_of_patches:
        return
    # We failed to produce patches with the given overlapping requirements.
    # Relaxate the requirements and try again.
    min_breast_overlap_threshold = min_breast_overlap_threshold * 0.95
    max_abnorm_overlap_threshold = max_abnorm_overlap_threshold * 1.05
    logging.debug(
        'Overlapping constraints relaxed to min_breast_overlap_threshold=%f '
        'and max_abnorm_overlap_threshold=%f while sampling negative '
        'patches for the mammography %s', min_breast_overlap_threshold,
        max_abnorm_overlap_threshold,
        image_filepath)  # Filepath to the abnormality mask image.

  # This should not happen ever.
  raise ValueError(
      'Only %d negative patches of size %r could be sampled satisfying the '
      'current conditions (min. relative overlapping area with breast = %f, '
      'max. relative overlapping area with abnormalities = %f) for the '
      'mammography %s' %
      (number_of_yielded_patches, patch_size, min_breast_overlap_threshold,
       max_abnorm_overlap_threshold, image_filepath))