in pyrit/prompt_converter/token_smuggling/ascii_smuggler_converter.py [0:0]
def encode_message(self, *, message: str):
"""
Encodes the message using Unicode Tags.
Each ASCII printable character (0x20-0x7E) is mapped to a corresponding
Unicode Tag (by adding 0xE0000). If control mode is enabled, wraps the output.
Args:
message (str): The message to encode.
Returns:
Tuple[str, str]: A tuple with a summary of code points and the encoded message.
"""
encoded = ""
code_points = ""
invalid_chars = ""
if self.unicode_tags:
encoded += chr(0xE0001)
code_points += "U+E0001 "
for char in message:
if 0x20 <= ord(char) <= 0x7E:
code_point = 0xE0000 + ord(char)
encoded += chr(code_point)
code_points += f"U+{code_point:X} "
else:
invalid_chars += char
if self.unicode_tags:
encoded += chr(0xE007F)
code_points += "U+E007F"
if invalid_chars:
logger.error(f"Invalid characters detected: {invalid_chars}")
return code_points, encoded