in src/engine/step1/_extraction_helpers.py [0:0]
def _add_cm_entity(raw_entity, entities_by_category, seen_names):
"""Classifies and records names detected by CM based on CM category and type into tool's categories.
Args:
raw_entity (dict): Dictionary defining the raw entity detected by CM.
entities_by_category (dict): Dictionary of previously detected entities by category.
seen_names (set): Set of names of already detected entities.
Returns:
tuple: First is the updated dictionary with entities by category. Second element is the set of seen names.
"""
entity = _reformat_cm_entity(raw_entity)
if entity["Text"] in seen_names:
return entities_by_category, seen_names
entity_category, entity_type = entity["Category"], entity["Type"]
if entity_category == "MEDICATION" and entity_type == "DURATION":
_update_time_category_with_cm_record(
entity, entities_by_category["TIMEDAYS"], seen_names
)
elif entity_category == "PROTECTED_HEALTH_INFORMATION" and entity_type == "DATE":
_update_category_with_cm_record(
entity, entities_by_category["TIMEYEARS"], seen_names
)
elif entity_category == "PROTECTED_HEALTH_INFORMATION" and entity_type == "AGE":
_update_category_with_cm_record(entity, entities_by_category["AGE"], seen_names)
elif entity_category == "PROTECTED_HEALTH_INFORMATION" and entity_type == "ADDRESS":
_update_category_with_cm_record(
entity, entities_by_category["STATE"], seen_names
)
elif entity_category == "MEDICATION" and (
entity_type in ("GENERIC_NAME", "BRAND_NAME")
):
_update_category_with_cm_record(
entity, entities_by_category["DRUG"], seen_names
)
elif entity_category == "MEDICAL_CONDITION":
_update_category_with_cm_record(
entity, entities_by_category["CONDITION"], seen_names
)
return entities_by_category, seen_names