private boolean hasNoFollowingBinData()

in pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java [329:401]


    private boolean hasNoFollowingBinData() throws IOException
    {
        // as suggested in PDFBOX-1164
        final int readBytes = source.read(binCharTestArr, 0, MAX_BIN_CHAR_TEST_LENGTH);
        boolean noBinData = true;
        int startOpIdx = -1;
        int endOpIdx = -1;
        String s = "";

        LOG.debug("String after EI: '{}'", () -> new String(binCharTestArr));

        if (readBytes > 0)
        {
            for (int bIdx = 0; bIdx < readBytes; bIdx++)
            {
                final byte b = binCharTestArr[bIdx];
                if (b != 0 && b < 0x09 || b > 0x0a && b < 0x20 && b != 0x0d)
                {
                    // control character or > 0x7f -> we have binary data
                    noBinData = false;
                    break;
                }
                // find the start of a PDF operator
                if (startOpIdx == -1 && !(b == 0 || b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
                {
                    startOpIdx = bIdx;
                }
                else if (startOpIdx != -1 && endOpIdx == -1 &&
                         (b == 0 || b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
                {
                    endOpIdx = bIdx;
                }
            }

            // PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough
            if (noBinData && endOpIdx != -1 && startOpIdx != -1)
            {
                // usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784),
                // or a number (PDFBOX-5957)
                s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
                if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s) &&
                    !s.matches("^\\d*\\.?\\d*$"))
                {
                    // operator is not Q, not EMC, not S, nur a number -> assume binary data
                    noBinData = false;
                }
            }

            // only if not close to EOF
            if (noBinData && startOpIdx != -1 && readBytes == MAX_BIN_CHAR_TEST_LENGTH) 
            {
                if (endOpIdx == -1)
                {
                    endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
                    s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
                }
                LOG.debug("startOpIdx: {} endOpIdx: {} s = '{}'", startOpIdx, endOpIdx, s);
                // look for token of 3 chars max or a number
                if (endOpIdx - startOpIdx > 3 && !s.matches("^\\d*\\.?\\d*$"))
                {
                    noBinData = false; // "operator" too long, assume binary data
                }
            }
            source.rewind(readBytes);
        }
        if (!noBinData)
        {
            LOG.warn(
                    "ignoring 'EI' assumed to be in the middle of inline image at stream offset {}, s = '{}'",
                    source.getPosition(), s);
        }
        return noBinData;
    }