in soundspaces/simulator.py [0:0]
def _compute_audiogoal(self):
sampling_rate = self.config.AUDIO.RIR_SAMPLING_RATE
if self._episode_step_count > self._duration:
logging.debug('Step count is greater than duration. Empty spectrogram.')
audiogoal = np.zeros((2, sampling_rate))
else:
binaural_rir_file = os.path.join(self.binaural_rir_dir, str(self.azimuth_angle), '{}_{}.wav'.format(
self._receiver_position_index, self._source_position_index))
try:
sampling_freq, binaural_rir = wavfile.read(binaural_rir_file) # float32
except ValueError:
logging.warning("{} file is not readable".format(binaural_rir_file))
binaural_rir = np.zeros((sampling_rate, 2)).astype(np.float32)
if len(binaural_rir) == 0:
logging.debug("Empty RIR file at {}".format(binaural_rir_file))
binaural_rir = np.zeros((sampling_rate, 2)).astype(np.float32)
# by default, convolve in full mode, which preserves the direct sound
if self.current_source_sound.shape[0] == sampling_rate:
binaural_convolved = np.array([fftconvolve(self.current_source_sound, binaural_rir[:, channel]
) for channel in range(binaural_rir.shape[-1])])
audiogoal = binaural_convolved[:, :sampling_rate]
else:
index = self._audio_index
self._audio_index = (self._audio_index + 1) % self._audio_length
if index * sampling_rate - binaural_rir.shape[0] < 0:
source_sound = self.current_source_sound[: (index + 1) * sampling_rate]
binaural_convolved = np.array([fftconvolve(source_sound, binaural_rir[:, channel]
) for channel in range(binaural_rir.shape[-1])])
audiogoal = binaural_convolved[:, index * sampling_rate: (index + 1) * sampling_rate]
else:
# include reverb from previous time step
source_sound = self.current_source_sound[index * sampling_rate - binaural_rir.shape[0] + 1
: (index + 1) * sampling_rate]
binaural_convolved = np.array([fftconvolve(source_sound, binaural_rir[:, channel], mode='valid',
) for channel in range(binaural_rir.shape[-1])])
audiogoal = binaural_convolved
if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
binaural_rir_file = os.path.join(self.binaural_rir_dir, str(self.azimuth_angle), '{}_{}.wav'.format(
self._receiver_position_index, self._distractor_position_index))
try:
sampling_freq, distractor_rir = wavfile.read(binaural_rir_file)
except ValueError:
logging.warning("{} file is not readable".format(binaural_rir_file))
distractor_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)
if len(distractor_rir) == 0:
logging.debug("Empty RIR file at {}".format(binaural_rir_file))
distractor_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)
distractor_convolved = np.array([fftconvolve(self._source_sound_dict[self._current_distractor_sound],
distractor_rir[:, channel]
) for channel in range(distractor_rir.shape[-1])])
audiogoal += distractor_convolved[:, :sampling_rate]
return audiogoal