def split()

in augly/text/augmenters/words_augmenter.py [0:0]


    def split(self, data: str) -> str:
        """Augmenter that splits words in two"""
        results = []
        tokens = tokenize(data)
        aug_word_cnt = self._generate_aug_cnt(
            len(tokens), self.aug_min, self.aug_max, self.aug_p
        )
        filtered_word_idxes = self.pre_skip_aug(tokens)
        aug_word_idxes = set(
            get_aug_idxes(
                self,
                tokens,
                filtered_word_idxes,
                aug_word_cnt,
                Method.WORD,
                self.min_char,
            )
        )

        if not aug_word_idxes:
            return data

        for t_i, token in enumerate(tokens):
            if t_i not in aug_word_idxes:
                results.append(token)
                continue

            target_token = tokens[t_i]
            split_position = random.randint(1, len(target_token) - 1)
            first_token = target_token[:split_position]
            second_token = target_token[split_position:]
            results.extend([first_token, second_token])

        return detokenize(results)