sal_Size ImplConvertIso2022CnToUnicode()

in main/sal/textenc/convertiso2022cn.c [91:412]


sal_Size ImplConvertIso2022CnToUnicode(ImplTextConverterData const * pData,
                                       void * pContext,
                                       sal_Char const * pSrcBuf,
                                       sal_Size nSrcBytes,
                                       sal_Unicode * pDestBuf,
                                       sal_Size nDestChars,
                                       sal_uInt32 nFlags,
                                       sal_uInt32 * pInfo,
                                       sal_Size * pSrcCvtBytes)
{
    ImplDBCSToUniLeadTab const * pGb2312Data
        = ((ImplIso2022CnConverterData const *) pData)->
              m_pGb2312ToUnicodeData;
    sal_uInt16 const * pCns116431992Data
        = ((ImplIso2022CnConverterData const *) pData)->
              m_pCns116431992ToUnicodeData;
    sal_Int32 const * pCns116431992RowOffsets
        = ((ImplIso2022CnConverterData const *) pData)->
              m_pCns116431992ToUnicodeRowOffsets;
    sal_Int32 const * pCns116431992PlaneOffsets
        = ((ImplIso2022CnConverterData const *) pData)->
              m_pCns116431992ToUnicodePlaneOffsets;
    ImplIso2022CnToUnicodeState eState
        = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
    sal_uInt32 nRow = 0;
    sal_Bool bSo = sal_False;
    sal_Bool b116431 = sal_False;
    sal_uInt32 nInfo = 0;
    sal_Size nConverted = 0;
    sal_Unicode * pDestBufPtr = pDestBuf;
    sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;

    if (pContext)
    {
        eState = ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState;
        nRow = ((ImplIso2022CnToUnicodeContext *) pContext)->m_nRow;
        bSo = ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo;
        b116431 = ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431;
    }

    for (; nConverted < nSrcBytes; ++nConverted)
    {
        sal_Bool bUndefined = sal_True;
        sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
        sal_uInt32 nPlane;
        switch (eState)
        {
        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII:
            if (nChar == 0x0E) /* SO */
            {
                bSo = sal_True;
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
            }
            else if (nChar == 0x1B) /* ESC */
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
            else if (nChar < 0x80)
                if (pDestBufPtr != pDestBufEnd)
                    *pDestBufPtr++ = (sal_Unicode) nChar;
                else
                    goto no_output;
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO:
            if (nChar == 0x0F) /* SI */
            {
                bSo = sal_False;
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            }
            else if (nChar == 0x1B) /* ESC */
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
            else if (nChar >= 0x21 && nChar <= 0x7E)
            {
                nRow = nChar;
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2;
            }
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2:
            if (nChar >= 0x21 && nChar <= 0x7E)
                if (b116431)
                {
                    nPlane = 0;
                    goto transform;
                }
                else
                {
                    sal_uInt16 nUnicode = 0;
                    sal_uInt32 nFirst;
                    nRow += 0x80;
                    nChar += 0x80;
                    nFirst = pGb2312Data[nRow].mnTrailStart;
                    if (nChar >= nFirst
                        && nChar <= pGb2312Data[nRow].mnTrailEnd)
                        nUnicode = pGb2312Data[nRow].
                                       mpToUniTrailTab[nChar - nFirst];
                    if (nUnicode != 0)
                        if (pDestBufPtr != pDestBufEnd)
                        {
                            *pDestBufPtr++ = (sal_Unicode) nUnicode;
                            eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
                        }
                        else
                            goto no_output;
                    else
                        goto bad_input;
                }
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432:
            if (nChar >= 0x21 && nChar <= 0x7E)
            {
                nRow = nChar;
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2;
            }
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2:
            if (nChar >= 0x21 && nChar <= 0x7E)
            {
                nPlane = 1;
                goto transform;
            }
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC:
            if (nChar == 0x24) /* $ */
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR;
            else if (nChar == 0x4E) /* N */
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432;
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR:
            if (nChar == 0x29) /* ) */
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
            else if (nChar == 0x2A) /* * */
                eState
                    = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK;
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
            if (nChar == 0x41) /* A */
            {
                b116431 = sal_False;
                eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
                               IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            }
            else if (nChar == 0x47) /* G */
            {
                b116431 = sal_True;
                eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
                               IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            }
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;

        case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK:
            if (nChar == 0x48) /* H */
                eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
                               IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            else
            {
                bUndefined = sal_False;
                goto bad_input;
            }
            break;
        }
        continue;

    transform:
        {
            sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
            if (nPlaneOffset == -1)
                goto bad_input;
            else
            {
                sal_Int32 nOffset
                    = pCns116431992RowOffsets[nPlaneOffset + (nRow - 0x21)];
                if (nOffset == -1)
                    goto bad_input;
                else
                {
                    sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
                    sal_uInt32 nFirst = nFirstLast & 0xFF;
                    sal_uInt32 nLast = nFirstLast >> 8;
                    nChar -= 0x20;
                    if (nChar >= nFirst && nChar <= nLast)
                    {
                        sal_uInt32 nUnicode
                            = pCns116431992Data[nOffset + (nChar - nFirst)];
                        if (nUnicode == 0xFFFF)
                            goto bad_input;
                        else if (ImplIsHighSurrogate(nUnicode))
                            if (pDestBufEnd - pDestBufPtr >= 2)
                            {
                                nOffset += nLast - nFirst + 1;
                                nFirst = pCns116431992Data[nOffset++];
                                *pDestBufPtr++ = (sal_Unicode) nUnicode;
                                *pDestBufPtr++
                                    = (sal_Unicode)
                                          pCns116431992Data[
                                              nOffset + (nChar - nFirst)];
                            }
                            else
                                goto no_output;
                        else
                            if (pDestBufPtr != pDestBufEnd)
                                *pDestBufPtr++ = (sal_Unicode) nUnicode;
                            else
                                goto no_output;
                    }
                    else
                        goto bad_input;
                    eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
                                   IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
                }
            }
            continue;
        }

    bad_input:
        switch (ImplHandleBadInputTextToUnicodeConversion(
                    bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
                    &nInfo))
        {
        case IMPL_BAD_INPUT_STOP:
            eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            b116431 = sal_False;
            break;

        case IMPL_BAD_INPUT_CONTINUE:
            eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
            b116431 = sal_False;
            continue;

        case IMPL_BAD_INPUT_NO_OUTPUT:
            goto no_output;
        }
        break;

    no_output:
        --pSrcBuf;
        nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
        break;
    }

    if (eState > IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO
        && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
                         | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
               == 0)
    {
        if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
            nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
        else
            switch (ImplHandleBadInputTextToUnicodeConversion(
                        sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
                        &nInfo))
            {
            case IMPL_BAD_INPUT_STOP:
            case IMPL_BAD_INPUT_CONTINUE:
                eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
                b116431 = sal_False;
                break;

            case IMPL_BAD_INPUT_NO_OUTPUT:
                nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
                break;
            }
    }

    if (pContext)
    {
        ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState = eState;
        ((ImplIso2022CnToUnicodeContext *) pContext)->m_nRow = nRow;
        ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo = bSo;
        ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431 = b116431;
    }
    if (pInfo)
        *pInfo = nInfo;
    if (pSrcCvtBytes)
        *pSrcCvtBytes = nConverted;

    return pDestBufPtr - pDestBuf;
}