in mlops-template-gitlab/lambda_functions/lambda-seedcode-checkin-gitlab/charset_normalizer/md.py [0:0]
def feed(self, character: str) -> None:
if character.isalpha():
self._buffer = "".join([self._buffer, character])
if is_accentuated(character):
self._buffer_accent_count += 1
if (
self._foreign_long_watch is False
and is_latin(character) is False
and is_cjk(character) is False
and is_hangul(character) is False
and is_katakana(character) is False
and is_hiragana(character) is False
and is_thai(character) is False
):
self._foreign_long_watch = True
return
if not self._buffer:
return
if (
character.isspace() or is_punctuation(character) or is_separator(character)
) and self._buffer:
self._word_count += 1
buffer_length = len(self._buffer) # type: int
self._character_count += buffer_length
if buffer_length >= 4 and self._buffer_accent_count / buffer_length >= 0.3:
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
self._is_current_word_bad = True
if self._is_current_word_bad:
self._bad_word_count += 1
self._bad_character_count += len(self._buffer)
self._is_current_word_bad = False
self._foreign_long_watch = False
self._buffer = ""
self._buffer_accent_count = 0
elif (
character not in {"<", ">", "-", "="}
and character.isdigit() is False
and is_symbol(character)
):
self._is_current_word_bad = True
self._buffer += character