in ludwig/features/audio_feature.py [0:0]
def _read_audio_and_transform_to_feature(filepath, audio_feature_dict,
feature_dim, max_length,
padding_value, normalization_type,
audio_stats):
"""
:param filepath: path to the audio
:param audio_feature_dict: dictionary describing audio feature see default
:param feature_dim: dimension of each feature frame
:param max_length: max audio length defined by user in samples
"""
try:
import soundfile
except ImportError:
logger.error(
' soundfile is not installed. '
'In order to install all audio feature dependencies run '
'pip install ludwig[audio]'
)
sys.exit(-1)
feature_type = audio_feature_dict[TYPE]
audio, sampling_rate_in_hz = soundfile.read(filepath)
AudioFeatureMixin._update(audio_stats, audio, sampling_rate_in_hz)
if feature_type == 'raw':
audio_feature = np.expand_dims(audio, axis=-1)
elif feature_type in ['stft', 'stft_phase', 'group_delay', 'fbank']:
audio_feature = np.transpose(
AudioFeatureMixin._get_2D_feature(audio, feature_type,
audio_feature_dict,
sampling_rate_in_hz))
else:
raise ValueError('{} is not recognized.'.format(feature_type))
if normalization_type == 'per_file':
mean = np.mean(audio_feature, axis=0)
std = np.std(audio_feature, axis=0)
audio_feature = np.divide((audio_feature - mean),
std + 1.0e-10)
elif normalization_type == 'global':
raise ValueError('not implemented yet')
feature_length = audio_feature.shape[0]
broadcast_feature_length = min(feature_length, max_length)
audio_feature_padded = np.full((max_length, feature_dim),
padding_value,
dtype=np.float32)
audio_feature_padded[:broadcast_feature_length, :] = audio_feature[
:max_length, :]
return audio_feature_padded