in libs/solaris/data/coco.py [0:0]
def df_to_coco_annos(df, output_path=None, geom_col='geometry',
image_id_col=None, category_col=None, score_col=None,
preset_categories=None, supercategory_col=None,
include_other=True, starting_id=1, verbose=0):
"""Extract COCO-formatted annotations from a pandas ``DataFrame``.
This function assumes that *annotations are already in pixel coordinates.*
If this is not the case, you can transform them using
:func:`solaris.vector.polygon.geojson_to_px_gdf`.
Note that this function generates annotations formatted per the COCO object
detection specification. For additional information, see
`the COCO dataset specification`_.
.. _the COCO dataset specification: http://cocodataset.org/#format-data
Arguments
---------
df : :class:`pandas.DataFrame`
A :class:`pandas.DataFrame` containing geometries to store as annos.
image_id_col : str, optional
The column containing image IDs. If not provided, it's assumed that
all are in the same image, which will be assigned the ID of ``1``.
geom_col : str, optional
The name of the column in `df` that contains geometries. The geometries
should either be shapely :class:`shapely.geometry.Polygon` s or WKT
strings. Defaults to ``"geometry"``.
category_col : str, optional
The name of the column that specifies categories for each object. If
not provided, all objects will be placed in a single category named
``"other"``.
score_col : str, optional
The name of the column that specifies the ouptut confidence of a model.
If not provided, will not be output.
preset_categories : :class:`list` of :class:`dict`s, optional
A pre-set list of categories to use for labels. These categories should
be formatted per
`the COCO category specification`_.
starting_id : int, optional
The number to start numbering annotation IDs at. Defaults to ``1``.
verbose : int, optional
Verbose text output. By default, none is provided; if ``True`` or
``1``, information-level outputs are provided; if ``2``, extremely
verbose text is output.
.. _the COCO category specification: http://cocodataset.org/#format-data
Returns
-------
output_dict : dict
A dictionary containing COCO-formatted annotation and category entries
per the `COCO dataset specification`_
"""
logger = logging.getLogger(__name__)
logger.setLevel(_get_logging_level(int(verbose)))
logger.debug('Checking that df is loaded.')
df = _check_df_load(df)
temp_df = df.copy() # for manipulation
if preset_categories is not None and category_col is None:
logger.debug('preset_categories has a value, category_col is None.')
raise ValueError('category_col must be specified if using'
' preset_categories.')
elif preset_categories is not None and category_col is not None:
logger.debug('Both preset_categories and category_col have values.')
logger.debug('Getting list of category names.')
category_dict = _coco_category_name_id_dict_from_list(
preset_categories)
category_names = list(category_dict.keys())
if not include_other:
logger.info('Filtering out objects not contained in '
' preset_categories')
temp_df = temp_df.loc[temp_df[category_col].isin(category_names),
:]
else:
logger.info('Setting category to "other" for objects outside of '
'preset category list.')
temp_df.loc[~temp_df[category_col].isin(category_names),
category_col] = 'other'
if 'other' not in category_dict.keys():
logger.debug('Adding "other" to category_dict.')
other_id = np.array(list(category_dict.values())).max() + 1
category_dict['other'] = other_id
preset_categories.append({'id': other_id,
'name': 'other',
'supercategory': 'other'})
elif preset_categories is None and category_col is not None:
logger.debug('No preset_categories, have category_col.')
logger.info(f'Collecting unique category names from {category_col}.')
category_names = list(temp_df[category_col].unique())
logger.info('Generating category ID numbers arbitrarily.')
category_dict = {k: v for k, v in zip(category_names,
range(1, len(category_names)+1))}
else:
logger.debug('No category column or preset categories.')
logger.info('Setting category to "other" for all objects.')
category_col = 'category_col'
temp_df[category_col] = 'other'
category_names = ['other']
category_dict = {'other': 1}
if image_id_col is None:
temp_df['image_id'] = 1
else:
temp_df.rename(columns={image_id_col: 'image_id'})
logger.debug('Checking geometries.')
temp_df[geom_col] = temp_df[geom_col].apply(_check_geom)
logger.info('Getting area of geometries.')
temp_df['area'] = temp_df[geom_col].apply(lambda x: x.area)
logger.info('Getting geometry bounding boxes.')
temp_df['bbox'] = temp_df[geom_col].apply(
lambda x: bbox_corners_to_coco(x.bounds))
temp_df['category_id'] = temp_df[category_col].map(category_dict)
temp_df['annotation_id'] = list(range(starting_id,
starting_id + len(temp_df)))
if score_col is not None:
temp_df['score'] = df[score_col]
def _row_to_coco(row, geom_col, category_id_col, image_id_col, score_col):
"get a single annotation record from a row of temp_df."
if score_col is None:
return {'id': row['annotation_id'],
'image_id': int(row[image_id_col]),
'category_id': int(row[category_id_col]),
'segmentation': [polygon_to_coco(row[geom_col])],
'area': row['area'],
'bbox': row['bbox'],
'iscrowd': 0}
else:
return {'id': row['annotation_id'],
'image_id': int(row[image_id_col]),
'category_id': int(row[category_id_col]),
'segmentation': [polygon_to_coco(row[geom_col])],
'score': float(row[score_col]),
'area': row['area'],
'bbox': row['bbox'],
'iscrowd': 0}
coco_annotations = temp_df.apply(_row_to_coco, axis=1, geom_col=geom_col,
category_id_col='category_id',
image_id_col=image_id_col,
score_col=score_col).tolist()
coco_categories = coco_categories_dict_from_df(
temp_df, category_id_col='category_id',
category_name_col=category_col,
supercategory_col=supercategory_col)
output_dict = {'annotations': coco_annotations,
'categories': coco_categories}
if output_path is not None:
with open(output_path, 'w') as outfile:
json.dump(output_dict, outfile)
return output_dict