def _generate_examples()

in tensorflow_datasets/audio/speech_commands.py [0:0]


  def _generate_examples(self, archive, file_list):
    """Yields examples."""
    for path, file_obj in archive:
      if file_list is not None and path not in file_list:
        continue
      relpath, wavname = os.path.split(path)
      _, word = os.path.split(relpath)
      example_id = '{}_{}'.format(word, wavname)
      if word in WORDS:
        label = word
      elif word == SILENCE or word == BACKGROUND_NOISE:
        # The main tar file already contains all of the test files, except for
        # the silence ones. In fact it does not contain silence files at all.
        # So for the test set we take the silence files from the test tar file,
        # while for train and validation we build them from the
        # _background_noise_ folder.
        label = SILENCE
      else:
        # Note that in the train and validation there are a lot more _unknown_
        # labels than any of the other ones.
        label = UNKNOWN

      if word == BACKGROUND_NOISE:
        # Special handling of background noise. We need to cut these files to
        # many small files with 1 seconds length, and transform it to silence.
        audio_samples = np.array(
            lazy_imports_lib.lazy_imports.pydub.AudioSegment.from_file(
                file_obj, format='wav').get_array_of_samples())

        for start in range(0,
                           len(audio_samples) - SAMPLE_RATE, SAMPLE_RATE // 2):
          audio_segment = audio_samples[start:start + SAMPLE_RATE]
          cur_id = '{}_{}'.format(example_id, start)
          example = {'audio': audio_segment, 'label': label}
          yield cur_id, example
      else:
        try:
          example = {
              'audio':
                  np.array(
                      lazy_imports_lib.lazy_imports.pydub.AudioSegment
                      .from_file(file_obj,
                                 format='wav').get_array_of_samples()),
              'label':
                  label,
          }
          yield example_id, example
        except lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError:
          pass