def convert_unicode_word()

in torchmoji/word_generator.py [0:0]


    def convert_unicode_word(self, word):
        """ Converts Unicode words to ASCII using unidecode. If Unicode is not
            allowed (set as a variable during initialization), then only
            punctuation that can be converted to ASCII will be allowed.
        """
        if self.check_ascii(word):
            return True, word

        # First we ensure that the Unicode is normalized so it's
        # always a single character.
        word = unicodedata.normalize("NFKC", word)

        # Convert Unicode punctuation to ASCII equivalent. We want
        # e.g. "\u203c" (double exclamation mark) to be treated the same
        # as "!!" no matter if we allow other Unicode characters or not.
        word = self.convert_unicode_punctuation(word)

        if self.ignore_emojis:
            _, word = separate_emojis_and_text(word)

        # If conversion of punctuation and removal of emojis took care
        # of all the Unicode or if we allow Unicode then everything is fine
        if self.check_ascii(word) or self.allow_unicode_text:
            return True, word
        else:
            # Sometimes we might want to simply ignore Unicode sentences
            # (e.g. for vocabulary creation). This is another way to prevent
            # "polution" of strange Unicode tokens from low quality datasets
            return False, ''