in datasets.py [0:0]
def convert_bio_to_entities(self, bio_tag: List[str]) -> Tuple[List[Entity], Entity]:
entities = []
current_entity = None
for ii, el in enumerate(bio_tag):
if el.startswith('B-'):
tag_type = el[2:]
if '.' in tag_type:
natural = ' '.join([self.natural_entity_types[tag_part]
if tag_part in self.natural_entity_types else tag_part
for tag_part in tag_type.split('.')])
else:
natural = self.natural_entity_types[tag_type] if tag_type in self.natural_entity_types else tag_type
current_entity = Entity(
type=EntityType(
short=tag_type,
natural=natural
),
start=ii,
end=ii+1,
)
entities.append(current_entity)
elif el.startswith('I-'):
current_entity.end = ii + 1
return entities