in services/skills.py [0:0]
def _build_patterns(self, skills: list, create: bool = False):
"""Build all matcher patterns"""
patterns_path = self.data_path/"skill_patterns.jsonl"
if not patterns_path.exists() or create:
"""Build up lists of spacy token patterns for matcher"""
patterns = []
split_tokens = [".", "/", "-"]
for skill_id, skill_info in skills.items():
aliases = skill_info['aliases']
sources = skill_info['sources']
skill_names = set()
for al in aliases:
skill_names.add(al)
for source in sources:
if "displayName" in source:
skill_names.add(source["displayName"])
for name in skill_names:
if name.upper() == name:
skill_name = name
else:
skill_name = name.lower().strip()
if skill_name not in STOP_WORDS:
pattern = self._skill_pattern(skill_name)
if pattern:
label = f"SKILL|{skill_id}"
patterns.append({"label": label, "pattern": pattern})
for t in split_tokens:
if t in skill_name:
patterns.append(
{
"label": label,
"pattern": self._skill_pattern(
skill_name, t
),
}
)
srsly.write_jsonl(patterns_path, patterns)
return patterns
else:
patterns = srsly.read_jsonl(patterns_path)
return patterns