def decode_message()

in pyrit/prompt_converter/token_smuggling/variation_selector_smuggler_converter.py [0:0]


    def decode_message(self, message: str) -> str:
        """
        Decode a message encoded using Unicode variation selectors.
        The decoder scans the string for variation selectors, ignoring any visible separator.
        """
        bytes_out = bytearray()
        started = False
        for char in message:
            # If not embedding, skip visible separators (e.g., spaces)
            if not self.embed_in_base and char == " ":
                continue
            code = ord(char)
            if 0xFE00 <= code <= 0xFE0F:
                started = True
                byte = code - 0xFE00
                bytes_out.append(byte)
            elif 0xE0100 <= code <= 0xE01EF:
                started = True
                byte = (code - 0xE0100) + 16
                bytes_out.append(byte)
            else:
                if started:
                    break
        try:
            decoded_text = bytes_out.decode("utf-8")
        except UnicodeDecodeError:
            decoded_text = bytes_out.decode("utf-8", errors="replace")
            logger.error("Decoded byte sequence is not valid UTF-8; some characters may be replaced.")
        logger.info(f"Variation Selector Smuggler decoding complete: {len(decoded_text)} characters decoded.")
        return decoded_text