in siammot/data/video_dataset.py [0:0]
def entity2target(self, im: Image, entities: [AnnoEntity]):
"""
Wrap up the entity to maskrcnn-benchmark compatible format - BoxList
"""
boxes = [entity.bbox for entity in entities]
ids = [int(entity.id) for entity in entities]
# we only consider person tracking for now,
# thus all the labels are 1,
# reserve category 0 for background during training
int_labels = [1 for _ in entities]
boxes = torch.as_tensor(boxes).reshape(-1, 4)
boxes = BoxList(boxes, im.size, mode='xywh').convert('xyxy')
if not self.amodal:
boxes = boxes.clip_to_image(remove_empty=False)
boxes.add_field('labels', torch.as_tensor(int_labels, dtype=torch.int64))
boxes.add_field('ids', torch.as_tensor(ids, dtype=torch.int64))
return boxes