def __init__()

in src/dataset.py [0:0]


    def __init__(self,
                 dataset_directory,
                 chunk_size_ms=200,
                 overlap=0.5
                 ):
        super().__init__()
        # load audio data and relative transmitter/receiver position/orientation
        self.mono, self.binaural, self.view = [], [], []
        for subject_id in range(8):
            mono, _ = ta.load(f"{dataset_directory}/subject{subject_id + 1}/mono.wav")
            binaural, _ = ta.load(f"{dataset_directory}/subject{subject_id + 1}/binaural.wav")
            # receiver is fixed at origin in this dataset, so we only need transmitter view
            tx_view = np.loadtxt(f"{dataset_directory}/subject{subject_id + 1}/tx_positions.txt").transpose()
            self.mono.append(mono)
            self.binaural.append(binaural)
            self.view.append(tx_view.astype(np.float32))
        # ensure that chunk_size is a multiple of 400 to match audio (48kHz) and receiver/transmitter positions (120Hz)
        self.chunk_size = chunk_size_ms * 48
        if self.chunk_size % 400 > 0:
            self.chunk_size = self.chunk_size + 400 - self.chunk_size % 400
        # compute chunks
        self.chunks = []
        for subject_id in range(8):
            last_chunk_start_frame = self.mono[subject_id].shape[-1] - self.chunk_size + 1
            hop_length = int((1 - overlap) * self.chunk_size)
            for offset in range(0, last_chunk_start_frame, hop_length):
                self.chunks.append({'subject': subject_id, 'offset': offset})