in expanded_checklist/checklist/viewer/template_editor.py [0:0]
def tokenize_template_str(self, template_str, tagged_keys, tag_dict, max_count=5):
tagged_keys = list(tagged_keys)
trans_keys = ["{" + key + "}" for key in tagged_keys]
item_keys = [x[0] for x in tag_dict.items()]
item_vals = [[x[1][:max_count]] if type(x[1]) not in [list, tuple] else x[1][:max_count] for x in tag_dict.items()]
local_items = []
for idx, key in enumerate(tagged_keys):
self.tokenizer.add_tokens(trans_keys[idx])
for item_val in itertools.product(*item_vals):
if len(item_val) != len(set([str(x) for x in item_val])):
continue
local_item = {item_keys[i]: item_val[i] for i, _ in enumerate(item_val)}
local_items.append(local_item)
def _tokenize(text):
tokens = [self.tokenizer.decode(x) for x in self.tokenizer.encode(text, add_special_tokens=False)]
return [t for t in tokens if t]
def get_meta(text):
if text in trans_keys:
idx = trans_keys.index(text)
norm = tagged_keys[idx]
lemma = norm.split(":")[-1]
normalized_key = lemma.split('[')[0].split('.')[0]
texts = list()
for local_item in local_items:
try:
texts.append(self.format_fn(["{" + lemma +"}"], local_item)[0])
except:
pass
return (texts, norm, normalized_key)
else:
return text
template_tokens = [get_meta(t) for t in _tokenize(template_str)]
return template_tokens