def read()

in torchbenchmark/models/demucs/demucs/audio.py [0:0]
53 lines of code
19 McCabe index (conditional complexity)

    def read(self,
             seek_time=None,
             duration=None,
             streams=slice(None),
             samplerate=None,
             channels=None,
             temp_folder=None):
        """
        Slightly more efficient implementation than stempeg,
        in particular, this will extract all stems at once
        rather than having to loop over one file multiple times
        for each stream.

        Args:
            seek_time (float):  seek time in seconds or None if no seeking is needed.
            duration (float): duration in seconds to extract or None to extract until the end.
            streams (slice, int or list): streams to extract, can be a single int, a list or
                a slice. If it is a slice or list, the output will be of size [S, C, T]
                with S the number of streams, C the number of channels and T the number of samples.
                If it is an int, the output will be [C, T].
            samplerate (int): if provided, will resample on the fly. If None, no resampling will
                be done. Original sampling rate can be obtained with :method:`samplerate`.
            channels (int): if 1, will convert to mono. We do not rely on ffmpeg for that
                as ffmpeg automatically scale by +3dB to conserve volume when playing on speakers.
                See https://sound.stackexchange.com/a/42710.
                Our definition of mono is simply the average of the two channels. Any other
                value will be ignored.
            temp_folder (str or Path or None): temporary folder to use for decoding.


        """
        streams = np.array(range(len(self)))[streams]
        single = not isinstance(streams, np.ndarray)
        if single:
            streams = [streams]

        if duration is None:
            target_size = None
            query_duration = None
        else:
            target_size = int((samplerate or self.samplerate()) * duration)
            query_duration = float((target_size + 1) / (samplerate or self.samplerate()))

        with temp_filenames(len(streams)) as filenames:
            command = ['ffmpeg', '-y']
            command += ['-loglevel', 'panic']
            if seek_time:
                command += ['-ss', str(seek_time)]
            command += ['-i', str(self.path)]
            for stream, filename in zip(streams, filenames):
                command += ['-map', f'0:{self._audio_streams[stream]}']
                if query_duration is not None:
                    command += ['-t', str(query_duration)]
                command += ['-threads', '1']
                command += ['-f', 'f32le']
                if samplerate is not None:
                    command += ['-ar', str(samplerate)]
                command += [filename]

            sp.run(command, check=True)
            wavs = []
            for filename in filenames:
                wav = np.fromfile(filename, dtype=np.float32)
                wav = torch.from_numpy(wav)
                wav = wav.view(-1, self.channels()).t()
                if channels == 1:
                    # Case 1:
                    # The caller asked 1-channel audio, but the stream have multiple
                    # channels, downmix all channels.
                    # We do mono convertion here as ffmpeg mess up the volume of mono output
                    # otherwise. See https://sound.stackexchange.com/a/42710.
                    wav = wav.mean(dim=0, keepdim=True)
                elif self.channels() == 1 and channels != 1:
                    # Case 2:
                    # The caller asked for multiple channels, but the input file have
                    # one single channel, replicate the audio over all channels.
                    wav = wav.as_strided(size=(channels, wav.shape[1]), stride=(0, 1))
                elif self.channels() >= channels:
                    # Case 3:
                    # The caller asked for multiple channels, and the input file have
                    # more channels than requested. In that case return the first channels.
                    wav = wav[:channels, :]
                else:
                    # Case 4: What is a reasonable choice here?
                    raise ValueError('The input file has less channels than requested')
                if target_size is not None:
                    wav = wav[..., :target_size]
                wavs.append(wav)
        wav = torch.stack(wavs, dim=0)
        if single:
            wav = wav[0]
        return wav