demo.py [77:90]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		image = Image.open(os.path.join(opt.video_frame_path, str(frame_index).zfill(6) + '.png')).convert('RGB')
		#image = image.transpose(Image.FLIP_LEFT_RIGHT)
		frame = vision_transform(image).unsqueeze(0) #unsqueeze to add a batch dimension
		data['frame'] = frame

		output = model.forward(data)
		predicted_spectrogram = output['binaural_spectrogram'][0,:,:,:].data[:].cpu().numpy()

		#ISTFT to convert back to audio
		reconstructed_stft_diff = predicted_spectrogram[0,:,:] + (1j * predicted_spectrogram[1,:,:])
		reconstructed_signal_diff = librosa.istft(reconstructed_stft_diff, hop_length=160, win_length=400, center=True, length=samples_per_window)
		reconstructed_signal_left = (audio_segment_mix + reconstructed_signal_diff) / 2
		reconstructed_signal_right = (audio_segment_mix - reconstructed_signal_diff) / 2
		reconstructed_binaural = np.concatenate((np.expand_dims(reconstructed_signal_left, axis=0), np.expand_dims(reconstructed_signal_right, axis=0)), axis=0) * normalizer
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


demo.py [104:115]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	image = Image.open(os.path.join(opt.video_frame_path, str(frame_index).zfill(6) + '.png')).convert('RGB')
	#image = image.transpose(Image.FLIP_LEFT_RIGHT)
	frame = vision_transform(image).unsqueeze(0) #unsqueeze to add a batch dimension
	data['frame'] = frame
	output = model.forward(data)
	predicted_spectrogram = output['binaural_spectrogram'][0,:,:,:].data[:].cpu().numpy()
	#ISTFT to convert back to audio
	reconstructed_stft_diff = predicted_spectrogram[0,:,:] + (1j * predicted_spectrogram[1,:,:])
	reconstructed_signal_diff = librosa.istft(reconstructed_stft_diff, hop_length=160, win_length=400, center=True, length=samples_per_window)
	reconstructed_signal_left = (audio_segment_mix + reconstructed_signal_diff) / 2
	reconstructed_signal_right = (audio_segment_mix - reconstructed_signal_diff) / 2
	reconstructed_binaural = np.concatenate((np.expand_dims(reconstructed_signal_left, axis=0), np.expand_dims(reconstructed_signal_right, axis=0)), axis=0) * normalizer
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -