def __getitem__()

in mico/dataloader/query_doc_pair.py [0:0]


    def __getitem__(self, idx):
        """We fetch the sample (from train/val/test dataset) from the CSV file.
        If it is training or validation dataset, we use block-wise mapping to find the line number.
        
        Parameters
        ----------
        idx : int
            Which sample we are fetching.

        Returns
        -------
        parsed_list : list
            If is_get_all_info is True, return a list of string: [query, ID, document, click, purchase]
            If is_get_all_info is False (by default), return a list of strings: [query, document]
        """
        if not self.is_test_data:
            block_idx = idx // self.train_val_subblock_size
            inbloack_idx = idx % self.train_val_subblock_size
            block_idx = self.idx_mapping[block_idx]
            idx = block_idx * self.train_val_subblock_size + inbloack_idx
        idx += self._is_csv_header # the CSV header is not a sample
        offset_idx = idx // self.offset_save_per_line
        offset = self.offset_data[offset_idx]
        try:
            with open(self._filename, 'r') as csv_file:
                csv_file.seek(offset)
                for _ in range(1 + idx % self.offset_save_per_line):
                    line = csv_file.readline()
            line = line.replace('\0','')
            csv_line = csv.reader([line], **self.csv_reader_setting)
            parsed_list = next(csv_line) # [query, ID, document, click, purchase]
            if self.is_get_all_info:
                return parsed_list
            else:
                return [parsed_list[0], parsed_list[2]]
        except: # This is for quick inspection about which part goes wrong.
            error_message = "Something wrong when reading CSV samples.\n Details (filename, line index): {}, \t {}".format(self._filename, idx)
            raise IOError(error_message)