def tokenize_template_str()

in expanded_checklist/checklist/viewer/template_editor.py [0:0]


    def tokenize_template_str(self, template_str, tagged_keys, tag_dict, max_count=5):
        tagged_keys = list(tagged_keys)
        trans_keys = ["{" + key + "}" for key in tagged_keys]
        item_keys = [x[0] for x in tag_dict.items()]
        item_vals = [[x[1][:max_count]] if type(x[1]) not in [list, tuple] else x[1][:max_count] for x in tag_dict.items()]
        local_items = []
        for idx, key in enumerate(tagged_keys):
            self.tokenizer.add_tokens(trans_keys[idx])
        for item_val in itertools.product(*item_vals):
            if len(item_val) != len(set([str(x) for x in item_val])):
                continue
            local_item = {item_keys[i]: item_val[i] for i, _ in enumerate(item_val)}
            local_items.append(local_item)
            
        def _tokenize(text):
            tokens = [self.tokenizer.decode(x) for x in self.tokenizer.encode(text, add_special_tokens=False)]
            return [t for t in tokens if t]
        def get_meta(text):
            if text in trans_keys:
                idx = trans_keys.index(text)
                norm = tagged_keys[idx]
                lemma = norm.split(":")[-1]
                normalized_key = lemma.split('[')[0].split('.')[0]
                texts = list()
                for local_item in local_items:
                    try:
                        texts.append(self.format_fn(["{" + lemma  +"}"], local_item)[0])
                    except:
                        pass
                return (texts, norm, normalized_key)
            else:
                return text
        
        template_tokens = [get_meta(t) for t in _tokenize(template_str)]
        return template_tokens