def encode_message()

in pyrit/prompt_converter/token_smuggling/variation_selector_smuggler_converter.py [0:0]


    def encode_message(self, message: str) -> Tuple[str, str]:
        """
        Encode the message using Unicode variation selectors.
        The message is converted to UTF-8 bytes, and each byte is mapped to a variation selector:
          - 0x00-0x0F => U+FE00 to U+FE0F.
          - 0x10-0xFF => U+E0100 to U+E01EF.
        If embed_in_base is True, the payload is embedded directly into the base character;
        otherwise, a visible separator (a space) is inserted between the base and payload.
        """
        payload = ""
        data = message.encode("utf-8")
        for byte in data:
            if byte < 16:
                code_point = 0xFE00 + byte
            else:
                code_point = 0xE0100 + (byte - 16)
            payload += chr(code_point)
        if self.embed_in_base:
            encoded = self.utf8_base_char + payload
        else:
            encoded = self.utf8_base_char + " " + payload
        summary_parts = [f"Base char: U+{ord(self.utf8_base_char):X}"]
        for byte in data:
            if byte < 16:
                summary_parts.append(f"U+{(0xFE00 + byte):X}")
            else:
                summary_parts.append(f"U+{(0xE0100 + (byte - 16)):X}")
        code_points_summary = " ".join(summary_parts)
        logger.info(f"Variation Selector Smuggler encoding complete: {len(data)} bytes encoded.")
        return code_points_summary.strip(), encoded