in src/dfcx_scrapi/core/entity_types.py [0:0]
def entity_types_to_df(
self,
agent_id: str = None,
mode: str = "basic",
entity_type_subset: List[str] = None) -> pd.DataFrame:
"""Extracts all Entity Types into a Pandas DataFrame.
Args:
agent_id, agent to pull list of entity types
mode: (Optional) "basic" returns display_name, value of entity type
and its synonyms.
"advanced" returns entity types and excluded phrases in a
comprehensive format.
entity_type_subset: (Optional) A list of entities to pull
If it's None, grab all the entity_types
Returns:
In basic mode, a Pandas Dataframe for all entity types in the agent
with schema:
display_name: str
entity_value: str
synonyms: str
In advanced mode, a dictionary with keys entity_types and
excluded_phrases.
The value of entity_types is a Pandas Dataframe with columns:
entity_type_id, display_name, kind, auto_expansion_mode,
fuzzy_extraction, redact, entity_value, synonyms
The value of excluded_phrases is a Dataframe with columns:
entity_type_id, display_name, excluded_phrase
"""
if not agent_id:
agent_id = self.agent_id
entity_types = self.list_entity_types(agent_id)
if mode == "basic":
main_df = pd.DataFrame()
for obj in entity_types:
if (entity_type_subset and
obj.display_name not in entity_type_subset):
continue
single_entity_df = self.entity_type_proto_to_dataframe(
obj, mode=mode
)
main_df = pd.concat([main_df, single_entity_df])
main_df = main_df.sort_values(
["display_name", "entity_value"])
return main_df
elif mode == "advanced":
main_df = pd.DataFrame()
excl_phrases_df = pd.DataFrame()
for obj in entity_types:
if (entity_type_subset and
obj.display_name not in entity_type_subset):
continue
single_entity_dict = self.entity_type_proto_to_dataframe(
obj, mode=mode
)
main_df = pd.concat(
[main_df, single_entity_dict["entity_types"]])
excl_phrases_df = pd.concat(
[excl_phrases_df, single_entity_dict["excluded_phrases"]])
type_map = {
"auto_expansion_mode": bool,
"fuzzy_extraction": bool,
"redact": bool
}
main_df = main_df.astype(type_map)
return {
"entity_types": main_df, "excluded_phrases": excl_phrases_df
}
else:
raise ValueError("Mode types: [basic, advanced]")