in dataspeech/gpu_enrichments/pitch.py [0:0]
def pitch_apply(batch, rank=None, audio_column_name="audio", output_column_name="utterance_pitch", penn_batch_size=4096):
if isinstance(batch[audio_column_name], list):
utterance_pitch_mean = []
utterance_pitch_std = []
for sample in batch[audio_column_name]:
# Infer pitch and periodicity
pitch, periodicity = penn.from_audio(
torch.tensor(sample["array"][None, :]).float(),
sample["sampling_rate"],
hopsize=hopsize,
fmin=fmin,
fmax=fmax,
checkpoint=checkpoint,
batch_size=penn_batch_size,
center=center,
interp_unvoiced_at=interp_unvoiced_at,
gpu=(rank or 0)% torch.cuda.device_count() if torch.cuda.device_count() > 0 else rank
)
utterance_pitch_mean.append(pitch.mean().cpu())
utterance_pitch_std.append(pitch.std().cpu())
batch[f"{output_column_name}_mean"] = utterance_pitch_mean
batch[f"{output_column_name}_std"] = utterance_pitch_std
else:
sample = batch[audio_column_name]
pitch, periodicity = penn.from_audio(
torch.tensor(sample["array"][None, :]).float(),
sample["sampling_rate"],
hopsize=hopsize,
fmin=fmin,
fmax=fmax,
checkpoint=checkpoint,
batch_size=penn_batch_size,
center=center,
interp_unvoiced_at=interp_unvoiced_at,
gpu=(rank or 0)% torch.cuda.device_count() if torch.cuda.device_count() > 0 else rank
)
batch[f"{output_column_name}_mean"] = pitch.mean().cpu()
batch[f"{output_column_name}_std"] = pitch.std().cpu()
return batch