def _get_data_as_datasets()

in tensorflow_hub/tools/make_image_classifier/make_image_classifier_lib.py [0:0]
53 lines of code
8 McCabe index (conditional complexity)

def _get_data_as_datasets(image_dir, image_size, hparams):
  """Gets training and validation data via tf.data.Dataset.

  Args:
    image_dir: A Python string with the name of a directory that contains
      subdirectories of images, one per class.
    image_size: A list or tuple with 2 Python integers specifying the fixed
      height and width to which input images are resized.
    hparams: A HParams object with hyperparameters controlling the training.

  Returns:
    A nested tuple ((train_data, train_size),
                    (valid_data, valid_size), labels) where:
    train_data, valid_data: tf.data.Dataset for use with Model.fit, each
      yielding batch of tuples (images, labels) where
        images is a float32 Tensor of shape [batch_size, height, width, 3]
          with pixel values in range [0,1],
        labels is a float32 Tensor of shape [batch_size, num_classes]
          with one-hot encoded classes.
    train_size, valid_size: Python integers with the numbers of training
      and validation examples, respectively.
    labels: A tuple of strings with the class labels (subdirectory names).
      The index of a label in this tuple is the numeric class id.
  """
  # Check if hparam.shear_range is set. If yes, throw an error since shear is
  # not supported when using preprocessing layers.
  if hparams.shear_range != 0:
    raise ValueError("Found non-zero value for shear_range. Shear is not "
                     "supported when using reading input with tf.data.Dataset "
                     "and using preprocessing layers.")

  train_ds = tf.keras.preprocessing.image_dataset_from_directory(
      image_dir,
      validation_split=hparams.validation_split,
      subset="training",
      label_mode="categorical",
      # Seed needs to provided when using validation_split and shuffle = True.
      # A fixed seed is used so that the validation set is stable across runs.
      seed=123,
      image_size=image_size,
      batch_size=1)
  class_names = tuple(train_ds.class_names)
  train_size = train_ds.cardinality().numpy()
  train_ds = train_ds.unbatch().batch(hparams.batch_size)
  train_ds = train_ds.repeat()

  normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(
      1. / 255)
  preprocessing_model = tf.keras.Sequential([normalization_layer])
  if hparams.do_data_augmentation:
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomRotation(
            hparams.rotation_range))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomTranslation(
            0, hparams.width_shift_range))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomTranslation(
            hparams.height_shift_range, 0))
    # Like the old tf.keras.preprocessing.image.ImageDataGenerator(),
    # image sizes are fixed when reading, and then a random zoom is applied.
    # If all training inputs are larger than image_size, one could also use
    # RandomCrop with a batch size of 1 and rebatch later.
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomZoom(
            hparams.zoom_range, hparams.zoom_range))
    if hparams.horizontal_flip:
      preprocessing_model.add(
          tf.keras.layers.experimental.preprocessing.RandomFlip(
              mode="horizontal"))
  train_ds = train_ds.map(lambda images, labels:
                          (preprocessing_model(images), labels))

  val_ds = tf.keras.preprocessing.image_dataset_from_directory(
      image_dir,
      validation_split=hparams.validation_split,
      subset="validation",
      label_mode="categorical",
      seed=123,
      shuffle=False,
      image_size=image_size,
      batch_size=1)
  valid_size = val_ds.cardinality().numpy()
  val_ds = val_ds.unbatch().batch(hparams.batch_size)
  val_ds = val_ds.map(lambda images, labels:
                      (normalization_layer(images), labels))

  return ((train_ds, train_size), (val_ds, valid_size), class_names)