in demucs/audio.py [0:0]
def read(self,
seek_time=None,
duration=None,
streams=slice(None),
samplerate=None,
channels=None,
temp_folder=None):
"""
Slightly more efficient implementation than stempeg,
in particular, this will extract all stems at once
rather than having to loop over one file multiple times
for each stream.
Args:
seek_time (float): seek time in seconds or None if no seeking is needed.
duration (float): duration in seconds to extract or None to extract until the end.
streams (slice, int or list): streams to extract, can be a single int, a list or
a slice. If it is a slice or list, the output will be of size [S, C, T]
with S the number of streams, C the number of channels and T the number of samples.
If it is an int, the output will be [C, T].
samplerate (int): if provided, will resample on the fly. If None, no resampling will
be done. Original sampling rate can be obtained with :method:`samplerate`.
channels (int): if 1, will convert to mono. We do not rely on ffmpeg for that
as ffmpeg automatically scale by +3dB to conserve volume when playing on speakers.
See https://sound.stackexchange.com/a/42710.
Our definition of mono is simply the average of the two channels. Any other
value will be ignored.
temp_folder (str or Path or None): temporary folder to use for decoding.
"""
streams = np.array(range(len(self)))[streams]
single = not isinstance(streams, np.ndarray)
if single:
streams = [streams]
if duration is None:
target_size = None
query_duration = None
else:
target_size = int((samplerate or self.samplerate()) * duration)
query_duration = float((target_size + 1) / (samplerate or self.samplerate()))
with temp_filenames(len(streams)) as filenames:
command = ['ffmpeg', '-y']
command += ['-loglevel', 'panic']
if seek_time:
command += ['-ss', str(seek_time)]
command += ['-i', str(self.path)]
for stream, filename in zip(streams, filenames):
command += ['-map', f'0:{self._audio_streams[stream]}']
if query_duration is not None:
command += ['-t', str(query_duration)]
command += ['-threads', '1']
command += ['-f', 'f32le']
if samplerate is not None:
command += ['-ar', str(samplerate)]
command += [filename]
sp.run(command, check=True)
wavs = []
for filename in filenames:
wav = np.fromfile(filename, dtype=np.float32)
wav = torch.from_numpy(wav)
wav = wav.view(-1, self.channels()).t()
if channels is not None:
wav = convert_audio_channels(wav, channels)
if target_size is not None:
wav = wav[..., :target_size]
wavs.append(wav)
wav = torch.stack(wavs, dim=0)
if single:
wav = wav[0]
return wav