in pyrit/prompt_converter/token_smuggling/variation_selector_smuggler_converter.py [0:0]
def encode_message(self, message: str) -> Tuple[str, str]:
"""
Encode the message using Unicode variation selectors.
The message is converted to UTF-8 bytes, and each byte is mapped to a variation selector:
- 0x00-0x0F => U+FE00 to U+FE0F.
- 0x10-0xFF => U+E0100 to U+E01EF.
If embed_in_base is True, the payload is embedded directly into the base character;
otherwise, a visible separator (a space) is inserted between the base and payload.
"""
payload = ""
data = message.encode("utf-8")
for byte in data:
if byte < 16:
code_point = 0xFE00 + byte
else:
code_point = 0xE0100 + (byte - 16)
payload += chr(code_point)
if self.embed_in_base:
encoded = self.utf8_base_char + payload
else:
encoded = self.utf8_base_char + " " + payload
summary_parts = [f"Base char: U+{ord(self.utf8_base_char):X}"]
for byte in data:
if byte < 16:
summary_parts.append(f"U+{(0xFE00 + byte):X}")
else:
summary_parts.append(f"U+{(0xE0100 + (byte - 16)):X}")
code_points_summary = " ".join(summary_parts)
logger.info(f"Variation Selector Smuggler encoding complete: {len(data)} bytes encoded.")
return code_points_summary.strip(), encoded