docker_images/asteroid/app/pipelines/audio_source_separation.py (12 lines of code) (raw):

from typing import Tuple import numpy as np from app.pipelines import Pipeline from asteroid import separate from asteroid.models import BaseModel class AudioSourceSeparationPipeline(Pipeline): def __init__(self, model_id: str): self.model = BaseModel.from_pretrained(model_id) self.sampling_rate = self.model.sample_rate def __call__(self, inputs: np.array) -> Tuple[np.array, int]: """ Args: inputs (:obj:`np.array`): The raw waveform of audio received. By default at 16KHz. Check `app.validation` if a different sample rate is required or if it depends on the model Return: A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int. """ # Pass wav as [batch, n_chan, time]; here: [1, 1, time] separated = separate.numpy_separate(self.model, inputs.reshape((1, 1, -1))) # FIXME: how to deal with multiple sources? return separated[0, 0], int(self.model.sample_rate)