scripts/tf_cnn_benchmarks/preprocessing.py [650:686]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  def create_dataset(self,
                     batch_size,
                     num_splits,
                     batch_size_per_split,
                     dataset,
                     subset,
                     train,
                     datasets_repeat_cached_sample,
                     num_threads=None,
                     datasets_use_caching=False,
                     datasets_parallel_interleave_cycle_length=None,
                     datasets_sloppy_parallel_interleave=False,
                     datasets_parallel_interleave_prefetch=None):
    """Creates a dataset for the benchmark."""
    assert self.supports_datasets()
    glob_pattern = dataset.tf_record_pattern(subset)
    file_names = gfile.Glob(glob_pattern)
    if not file_names:
      raise ValueError('Found no files in --data_dir matching: {}'
                       .format(glob_pattern))
    ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
    ds = ds.apply(
        tf.data.experimental.parallel_interleave(
            tf.data.TFRecordDataset,
            cycle_length=datasets_parallel_interleave_cycle_length or 10,
            sloppy=datasets_sloppy_parallel_interleave,
            prefetch_input_elements=datasets_parallel_interleave_prefetch))
    if datasets_repeat_cached_sample:
      # Repeat a single sample element indefinitely to emulate memory-speed IO.
      ds = ds.take(1).cache().repeat()
    counter = tf.data.Dataset.range(batch_size)
    counter = counter.repeat()
    ds = tf.data.Dataset.zip((ds, counter))
    ds = ds.prefetch(buffer_size=batch_size)
    if datasets_use_caching:
      ds = ds.cache()
    if train:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



scripts/tf_cnn_benchmarks/preprocessing.py [1205:1244]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  def create_dataset(self,
                     batch_size,
                     num_splits,
                     batch_size_per_split,
                     dataset,
                     subset,
                     train,
                     datasets_repeat_cached_sample,
                     num_threads=None,
                     datasets_use_caching=False,
                     datasets_parallel_interleave_cycle_length=None,
                     datasets_sloppy_parallel_interleave=False,
                     datasets_parallel_interleave_prefetch=None):
    """Creates a dataset for the benchmark."""
    # TODO(laigd): currently the only difference between this and the one in
    # BaseImagePreprocessor is, this uses map() and padded_batch() while the
    # latter uses tf.data.experimental.map_and_batch(). Try to merge them.
    assert self.supports_datasets()
    glob_pattern = dataset.tf_record_pattern(subset)
    file_names = gfile.Glob(glob_pattern)
    if not file_names:
      raise ValueError('Found no files in --data_dir matching: {}'
                       .format(glob_pattern))
    ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
    ds = ds.apply(
        tf.data.experimental.parallel_interleave(
            tf.data.TFRecordDataset,
            cycle_length=datasets_parallel_interleave_cycle_length or 10,
            sloppy=datasets_sloppy_parallel_interleave,
            prefetch_input_elements=datasets_parallel_interleave_prefetch))
    if datasets_repeat_cached_sample:
      # Repeat a single sample element indefinitely to emulate memory-speed IO.
      ds = ds.take(1).cache().repeat()
    counter = tf.data.Dataset.range(batch_size)
    counter = counter.repeat()
    ds = tf.data.Dataset.zip((ds, counter))
    ds = ds.prefetch(buffer_size=batch_size)
    if datasets_use_caching:
      ds = ds.cache()
    if train:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



