in python/mxboard/summary.py [0:0]
def audio_summary(tag, audio, sample_rate=44100):
"""Outputs a `Summary` protocol buffer with audio data.
Parameters
----------
tag : str
A name for the generated summary. Will also serve as a series name in TensorBoard.
audio : MXNet `NDArray` or `numpy.ndarray`
Audio data that can be squeezed into 1D array. The values are in the range [-1, 1].
sample_rate : int
Sampling frequency. 44,100Hz is a common sampling frequency.
Returns
-------
A `Summary` protobuf of the audio data.
"""
audio = audio.squeeze()
if audio.ndim != 1:
raise ValueError('input audio must be squeezable to 1D, input audio squeezed '
'shape is {}'.format(audio.shape))
audio = _make_numpy_array(audio)
tensor_list = [int(32767.0 * x) for x in audio]
fio = io.BytesIO()
wave_writer = wave.open(fio, 'wb')
wave_writer.setnchannels(1)
wave_writer.setsampwidth(2)
wave_writer.setframerate(sample_rate)
tensor_enc = b''
for v in tensor_list: # pylint: disable=invalid-name
tensor_enc += struct.pack('<h', v)
wave_writer.writeframes(tensor_enc)
wave_writer.close()
audio_string = fio.getvalue()
fio.close()
audio = Summary.Audio(sample_rate=sample_rate,
num_channels=1,
length_frames=len(tensor_list),
encoded_audio_string=audio_string,
content_type='audio/wav')
return Summary(value=[Summary.Value(tag=tag, audio=audio)])