in src/dfcx_scrapi/tools/validation_util.py [0:0]
def intent_disambiguation(self, agent_id, refresh=False, flow=None):
"""Obtains the intent disambiguation tasks from the validation tool
Args:
refresh: (optional) False means validation results are pulled
as is. True means the validation tool is refreshed then
results are pulled
flow: (optional) If specified results are returned for the
indicated flow display name
Returns:
Dictionary of intent disambiguation Validation results
in two dataframes.
extended: All intent disambiguation validtion results as
seperate instances. If 5 training phrases conflict
in 5 intents they will be shown as 5 rows.
compact: Only showing the first instance of a conflict
for each grouping. If 5 trainig phrases conflic in 5 intents
only the first training phrase will show.
"""
if refresh:
validation = self.agents.validate_agent(agent_id)
else:
validation = self.agents.get_validation_result(agent_id=agent_id)
validation_df = self.validation_results_to_dataframe(validation)
if flow:
validation_df = validation_df[validation_df["flow"] == flow]
# Parse df
resources = validation_df.columns
resources = [r for r in resources if "resource" in r]
validation_df = validation_df[["flow", "detail"] + resources]
disambig_id, intents_list, tp_list, id_ = [], [], [], 0
flows = []
phrase = "Multiple intents share training phrases which are too similar"
for _, row in validation_df.iterrows():
deets, flow = row["detail"], row["flow"]
if bool(re.search(phrase, deets)):
intents = re.findall("Intent '(.*)': training phrase ", deets)
training_phrases = re.findall("training phrase '(.*)'", deets)
intents_list = intents_list + intents
tp_list = tp_list + training_phrases
disambig_id = disambig_id + ([id_] * len(training_phrases))
flows = flows + ([flow] * len(training_phrases))
id_ += 1
extraction = pd.DataFrame()
extraction["disambig_id"] = disambig_id
extraction.insert(0, "flow", flows)
extraction["intent"] = intents_list
extraction["training_phrase"] = tp_list
if extraction.empty:
logging.info(
"Validation results do not contain clashing intent phrases.")
return None
intent_options = (
extraction.groupby(["disambig_id"])["intent"]
.apply(list)
.reset_index()
.rename(columns={"intent": "intents"})
)
intent_options["intents"] = intent_options.apply(
lambda x: list(set(x["intents"])), axis=1
)
extraction = pd.merge(
extraction, intent_options, on=["disambig_id"], how="left"
)
internal = extraction.copy()
internal["intent_count"] = internal.apply(
lambda x: len(x["intents"]), axis=1
)
external = (
extraction.groupby(["flow", "disambig_id"])
.agg(
{
"training_phrase": "first",
"intents": "first",
"intent": "count",
}
)
.reset_index()
.rename(columns={"intent": "conflicting_tp_count"})
)
external["intent_count"] = external.apply(
lambda x: len(x["intents"]), axis=1
)
return {"extended": internal, "compact": external}