def pitch_apply()

in dataspeech/gpu_enrichments/pitch.py [0:0]


def pitch_apply(batch, rank=None, audio_column_name="audio", output_column_name="utterance_pitch", penn_batch_size=4096):
    if isinstance(batch[audio_column_name], list):  
        utterance_pitch_mean = []
        utterance_pitch_std = []
        for sample in batch[audio_column_name]:
            # Infer pitch and periodicity
            pitch, periodicity = penn.from_audio(
                torch.tensor(sample["array"][None, :]).float(),
                sample["sampling_rate"],
                hopsize=hopsize,
                fmin=fmin,
                fmax=fmax,
                checkpoint=checkpoint,
                batch_size=penn_batch_size,
                center=center,
                interp_unvoiced_at=interp_unvoiced_at,
                gpu=(rank or 0)% torch.cuda.device_count() if torch.cuda.device_count() > 0 else rank
                )
            
            utterance_pitch_mean.append(pitch.mean().cpu())
            utterance_pitch_std.append(pitch.std().cpu())
            
        batch[f"{output_column_name}_mean"] = utterance_pitch_mean 
        batch[f"{output_column_name}_std"] = utterance_pitch_std 
    else:
        sample = batch[audio_column_name]
        pitch, periodicity = penn.from_audio(
                torch.tensor(sample["array"][None, :]).float(),
                sample["sampling_rate"],
                hopsize=hopsize,
                fmin=fmin,
                fmax=fmax,
                checkpoint=checkpoint,
                batch_size=penn_batch_size,
                center=center,
                interp_unvoiced_at=interp_unvoiced_at,
                gpu=(rank or 0)% torch.cuda.device_count() if torch.cuda.device_count() > 0 else rank
                )        
        batch[f"{output_column_name}_mean"] = pitch.mean().cpu()
        batch[f"{output_column_name}_std"] = pitch.std().cpu()

    return batch