def __getitem__()

in data/dataset.py [0:0]


    def __getitem__(self, idx):
        frame = self.framelist[idx]
        img_path, cam, frame_idx, joint = frame['img_path'], frame['cam'], frame['frame_idx'], frame['joint']
        joint_coord, joint_valid = joint['world_coord'], joint['valid']
       
        # input data
        # bbox calculate
        bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam], self.campos[cam], self.focal[cam], self.princpt[cam])
        xmin, ymin, xmax, ymax = bbox
        xmin = max(xmin,0); ymin = max(ymin,0); xmax = min(xmax, self.original_img_shape[1]-1); ymax = min(ymax, self.original_img_shape[0]-1);
        bbox = np.array([xmin, ymin, xmax, ymax])
        
        # image read
        img = load_img(img_path)
        xmin, ymin, xmax, ymax = bbox
        xmin, xmax = np.array([xmin, xmax])/self.original_img_shape[1]*img.shape[1]; ymin, ymax = np.array([ymin, ymax])/self.original_img_shape[0]*img.shape[0]
        bbox_img = np.array([xmin, ymin, xmax-xmin+1, ymax-ymin+1])
        img = generate_patch_image(img, bbox_img, False, 1.0, 0.0, cfg.input_img_shape)
        input_img = self.transform(img)/255.


        target_depthmaps = []; cam_params = []; affine_transes = [];
        for cam in random.sample(self.selected_cameras, cfg.render_view_num):
            # bbox calculate
            bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam], self.campos[cam], self.focal[cam], self.princpt[cam])
            xmin, ymin, xmax, ymax = bbox
            xmin = max(xmin,0); ymin = max(ymin,0); xmax = min(xmax, self.original_img_shape[1]-1); ymax = min(ymax, self.original_img_shape[0]-1);
            bbox = np.array([xmin, ymin, xmax, ymax])

            # depthmap read
            depthmap_path = osp.join(self.annot_path, 'depthmaps', 'subject_' + cfg.subject, "{:06d}".format(frame_idx), 'depthmap' + cam + '.pkl')
            with open(depthmap_path,'rb') as f:
                depthmap = pickle.load(f).astype(np.float32)
            xmin, ymin, xmax, ymax = bbox
            xmin, xmax = np.array([xmin, xmax])/self.original_img_shape[1]*depthmap.shape[1]; ymin, ymax = np.array([ymin, ymax])/self.original_img_shape[0]*depthmap.shape[0]
            bbox_depthmap = np.array([xmin, ymin, xmax-xmin+1, ymax-ymin+1])
            depthmap = generate_patch_image(depthmap[:,:,None], bbox_depthmap, False, 1.0, 0.0, cfg.rendered_img_shape)
            target_depthmaps.append(self.transform(depthmap))

            xmin, ymin, xmax, ymax = bbox
            affine_transes.append(gen_trans_from_patch_cv((xmin+xmax+1)/2., (ymin+ymax+1)/2., xmax-xmin+1, ymax-ymin+1, cfg.rendered_img_shape[1], cfg.rendered_img_shape[0], 1.0, 0.0).astype(np.float32))
            cam_params.append({'camrot': self.camrot[cam], 'campos': self.campos[cam], 'focal': self.focal[cam], 'princpt': self.princpt[cam]})
        
        inputs = {'img': input_img}
        targets = {'depthmap': target_depthmaps, 'joint': joint}
        meta_info = {'cam_param': cam_params, 'affine_trans': affine_transes}
      
        return inputs, targets, meta_info