in pyrit/prompt_converter/token_smuggling/variation_selector_smuggler_converter.py [0:0]
def decode_message(self, message: str) -> str:
"""
Decode a message encoded using Unicode variation selectors.
The decoder scans the string for variation selectors, ignoring any visible separator.
"""
bytes_out = bytearray()
started = False
for char in message:
# If not embedding, skip visible separators (e.g., spaces)
if not self.embed_in_base and char == " ":
continue
code = ord(char)
if 0xFE00 <= code <= 0xFE0F:
started = True
byte = code - 0xFE00
bytes_out.append(byte)
elif 0xE0100 <= code <= 0xE01EF:
started = True
byte = (code - 0xE0100) + 16
bytes_out.append(byte)
else:
if started:
break
try:
decoded_text = bytes_out.decode("utf-8")
except UnicodeDecodeError:
decoded_text = bytes_out.decode("utf-8", errors="replace")
logger.error("Decoded byte sequence is not valid UTF-8; some characters may be replaced.")
logger.info(f"Variation Selector Smuggler decoding complete: {len(decoded_text)} characters decoded.")
return decoded_text