easycv/datasets/pose/data_sources/oc_human.py (303 lines of code) (raw):

# Copyright (c) OpenMMLab. All rights reserved. # Adapt from https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py import json import logging import os import numpy as np from easycv.datasets.registry import DATASOURCES from easycv.framework.errors import ValueError from .top_down import PoseTopDownSource OC_HUMAN_DATASET_INFO = dict( dataset_name='OC HUMAN', paper_info=dict( author= 'Song-Hai Zhang, Ruilong Li, Xin Dong, Paul L. Rosin, Zixi Cai, Han Xi, Dingcheng Yang, Hao-Zhi Huang, Shi-Min Hu', title='Pose2Seg: Detection Free Human Instance Segmentation', container='Computer Vision and Pattern Recognition', year='2019', homepage='https://github.com/liruilong940607/OCHumanApi'), keypoint_info={ 0: dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), 1: dict( name='left_eye', id=1, color=[51, 153, 255], type='upper', swap='right_eye'), 2: dict( name='right_eye', id=2, color=[51, 153, 255], type='upper', swap='left_eye'), 3: dict( name='left_ear', id=3, color=[51, 153, 255], type='upper', swap='right_ear'), 4: dict( name='right_ear', id=4, color=[51, 153, 255], type='upper', swap='left_ear'), 5: dict( name='left_shoulder', id=5, color=[0, 255, 0], type='upper', swap='right_shoulder'), 6: dict( name='right_shoulder', id=6, color=[255, 128, 0], type='upper', swap='left_shoulder'), 7: dict( name='left_elbow', id=7, color=[0, 255, 0], type='upper', swap='right_elbow'), 8: dict( name='right_elbow', id=8, color=[255, 128, 0], type='upper', swap='left_elbow'), 9: dict( name='left_wrist', id=9, color=[0, 255, 0], type='upper', swap='right_wrist'), 10: dict( name='right_wrist', id=10, color=[255, 128, 0], type='upper', swap='left_wrist'), 11: dict( name='left_hip', id=11, color=[0, 255, 0], type='lower', swap='right_hip'), 12: dict( name='right_hip', id=12, color=[255, 128, 0], type='lower', swap='left_hip'), 13: dict( name='left_knee', id=13, color=[0, 255, 0], type='lower', swap='right_knee'), 14: dict( name='right_knee', id=14, color=[255, 128, 0], type='lower', swap='left_knee'), 15: dict( name='left_ankle', id=15, color=[0, 255, 0], type='lower', swap='right_ankle'), 16: dict( name='right_ankle', id=16, color=[255, 128, 0], type='lower', swap='left_ankle') }, skeleton_info={ 0: dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), 1: dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), 2: dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), 3: dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), 4: dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), 5: dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), 6: dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), 7: dict( link=('left_shoulder', 'right_shoulder'), id=7, color=[51, 153, 255]), 8: dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), 9: dict( link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), 10: dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), 11: dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), 12: dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), 13: dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), 14: dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), 15: dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), 16: dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), 17: dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), 18: dict( link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) }, joint_weights=[ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], sigmas=[ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ]) @DATASOURCES.register_module class PoseTopDownSourceChHuman(PoseTopDownSource): """Oc Human Source for top-down pose estimation. `Pose2Seg: Detection Free Human Instance Segmentation' ECCV'2019 More details can be found in the `paper <https://arxiv.org/abs/1803.10683>`__ . The source loads raw features to build a data meta object containing the image info, annotation info and others. Oc Human keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config subset: Applicable to non-coco or coco style data sets, if subset == train or val or test, in non-coco style else subset == None , in coco style dataset_info (DatasetInfo): A class containing all dataset info. test_mode (bool): Store True when building test or """ def __init__(self, ann_file, img_prefix, data_cfg, subset=None, dataset_info=None, test_mode=False, **kwargs): if dataset_info is None: logging.info( 'dataset_info is missing, use default coco dataset info') dataset_info = OC_HUMAN_DATASET_INFO self.subset = subset super().__init__( ann_file, img_prefix, data_cfg, coco_style=not bool(subset), # bool(1-bool(subset)) dataset_info=dataset_info, test_mode=test_mode) def _get_db(self): """Load dataset.""" # ground truth bbox if self.subset: gt_db = self._load_keypoint_annotations() else: gt_db = super()._load_keypoint_annotations() return gt_db def _load_keypoint_annotations(self): self._load_annofile() gt_db = list() for img_id in self.imgIds: gt_db.extend(self._oc_load_keypoint_annotation_kernel(img_id)) return gt_db def _load_annofile(self): self.human = json.load(open(self.ann_file, 'r')) self.keypoint_names = self.human['keypoint_names'] self.keypoint_visible = self.human['keypoint_visible'] self.images = {} self.imgIds = [] for imgItem in self.human['images']: annos = [ anno for anno in imgItem['annotations'] if anno['keypoints'] ] imgItem['annotations'] = annos self.imgIds.append(imgItem['image_id']) self.images[imgItem['image_id']] = imgItem assert len(self.imgIds) > 0, f'{self.ann_file} is None file' if self.subset == 'train': self.imgIds = self.imgIds[:int(len(self.imgIds) * 0.75)] else: self.imgIds = self.imgIds[int(len(self.imgIds) * 0.75):] self.num_images = len(self.imgIds) def _oc_load_keypoint_annotation_kernel(self, img_id, maxIouRange=(0., 1.)): """load annotation from OCHumanAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ data = self.images[img_id] file_name = data['file_name'] width = data['width'] height = data['height'] num_joints = self.ann_info['num_joints'] bbox_id = 0 rec = [] for i, anno in enumerate(data['annotations']): kpt = anno['keypoints'] max_iou = anno['max_iou'] if max_iou < maxIouRange[0] or max_iou >= maxIouRange[1]: continue # coco box: xyxy -> xywh x1, y1, x2, y2 = anno['bbox'] x, y, w, h = [x1, y1, x2 - x1, y2 - y1] area = (x2 - x1) * (y2 - y1) x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if area > 0 and x2 > x1 and y2 > y1: bbox = [x1, y1, x2 - x1, y2 - y1] # coco kpt: vis 2, not vis 1, missing 0. # 'keypoint_visible': {'missing': 0, 'vis': 1, 'self_occluded': 2, 'others_occluded': 3}, kptDef = self.human['keypoint_names'] kptDefCoco = [ 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle' ] kptCoco = [] num_keypoints = 0 for i in range(len(kptDefCoco)): idx = kptDef.index(kptDefCoco[i]) x, y, v = kpt[idx * 3:idx * 3 + 3] if v == 1 or v == 2: v = 2 num_keypoints += 1 elif v == 3: v = 1 num_keypoints += 1 kptCoco += [x, y, v] assert len(kptCoco) == 17 * 3 joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(kptCoco).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = super()._xywh2cs(*bbox) # image path image_file = os.path.join(self.img_prefix, file_name) rec.append({ 'image_file': image_file, 'image_id': img_id, 'center': center, 'scale': scale, 'bbox': bbox, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec