in jdbc/driver/mysql_util.cpp [2275:2377]
static int my_utf8_uni(unsigned long * pwc, const unsigned char *s, const unsigned char *e)
{
unsigned char c;
if (s >= e)
return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
{
*pwc = c;
return 1;
}
else if (c < 0xc2)
return MY_CS_ILSEQ;
else if (c < 0xe0)
{
if (s+2 > e) /* We need 2 characters */
return MY_CS_TOOSMALL2;
if (!((s[1] ^ 0x80) < 0x40))
return MY_CS_ILSEQ;
*pwc = ((unsigned long) (c & 0x1f) << 6) | (unsigned long) (s[1] ^ 0x80);
return 2;
}
else if (c < 0xf0)
{
if (s+3 > e) /* We need 3 characters */
return MY_CS_TOOSMALL3;
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 &&
(c >= 0xe1 || s[1] >= 0xa0)))
return MY_CS_ILSEQ;
*pwc = ((unsigned long) (c & 0x0f) << 12) |
((unsigned long) (s[1] ^ 0x80) << 6) |
(unsigned long) (s[2] ^ 0x80);
return 3;
}
#ifdef UNICODE_32BIT
else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
{
if (s+4 > e) /* We need 4 characters */
return MY_CS_TOOSMALL4;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
(s[3] ^ 0x80) < 0x40 &&
(c >= 0xf1 || s[1] >= 0x90)))
return MY_CS_ILSEQ;
*pwc = ((unsigned long) (c & 0x07) << 18) |
((unsigned long) (s[1] ^ 0x80) << 12) |
((unsigned long) (s[2] ^ 0x80) << 6) |
(unsigned long) (s[3] ^ 0x80);
return 4;
}
else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
{
if (s+5 >e) /* We need 5 characters */
return MY_CS_TOOSMALL5;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
(s[3] ^ 0x80) < 0x40 &&
(s[4] ^ 0x80) < 0x40 &&
(c >= 0xf9 || s[1] >= 0x88)))
return MY_CS_ILSEQ;
*pwc = ((unsigned long) (c & 0x03) << 24) |
((unsigned long) (s[1] ^ 0x80) << 18) |
((unsigned long) (s[2] ^ 0x80) << 12) |
((unsigned long) (s[3] ^ 0x80) << 6) |
(unsigned long) (s[4] ^ 0x80);
return 5;
}
else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
{
if ( s+6 >e ) /* We need 6 characters */
return MY_CS_TOOSMALL6;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
(s[3] ^ 0x80) < 0x40 &&
(s[4] ^ 0x80) < 0x40 &&
(s[5] ^ 0x80) < 0x40 &&
(c >= 0xfd || s[1] >= 0x84)))
return MY_CS_ILSEQ;
*pwc = ((unsigned long) (c & 0x01) << 30)
| ((unsigned long) (s[1] ^ 0x80) << 24)
| ((unsigned long) (s[2] ^ 0x80) << 18)
| ((unsigned long) (s[3] ^ 0x80) << 12)
| ((unsigned long) (s[4] ^ 0x80) << 6)
| (unsigned long) (s[5] ^ 0x80);
return 6;
}
#endif
return MY_CS_ILSEQ;
}