public Object parseNextToken()

in pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java [96:319]


    public Object parseNextToken() throws IOException
    {
        if (source.isClosed())
        {
            return null;
        }
        skipSpaces();
        if (source.isEOF())
        {
            close();
            return null;
        }
        char c = (char) source.peek();
        switch (c)
        {
            case '<':
                // pull off first left bracket
                source.read();

                // check for second left bracket
                c = (char) source.peek();

                // put back first bracket
                source.rewind(1);

                if (c == '<')
                {
                    try
                    {
                        return parseCOSDictionary(true);
                    }
                    catch (IOException exception)
                    {
                        LOG.warn("Stop reading invalid dictionary from content stream at offset {}",
                                source.getPosition());
                        close();
                        return null;
                    }
                }
                else
                {
                    return parseCOSString();
                }
            case '[':
                // array
                try
                {
                    return parseCOSArray();
                }
                catch (IOException exception)
                {
                    LOG.warn("Stop reading invalid array from content stream at offset {}",
                            source.getPosition());
                    close();
                    return null;
                }
            case '(':
                // string
                return parseCOSString();
            case '/':
                // name
                return parseCOSName();
            case 'n':   
                // null
                String nullString = readString();
                if( nullString.equals( "null") )
                {
                    return COSNull.NULL;
                }
                else
                {
                    return Operator.getOperator(nullString);
                }
            case 't':
            case 'f':
                String next = readString();
                if( next.equals( "true" ) )
                {
                    return COSBoolean.TRUE;
                }
                else if( next.equals( "false" ) )
                {
                    return COSBoolean.FALSE;
                }
                else
                {
                    return Operator.getOperator(next);
                }
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '-':
            case '+':
            case '.':
                /* We will be filling buf with the rest of the number.  Only
                 * allow 1 "." and "-" and "+" at start of number. */
                StringBuilder buf = new StringBuilder();
                buf.append( c );
                source.read();
                
                // Ignore double negative (this is consistent with Adobe Reader)
                if (c == '-' && source.peek() == c)
                {
                    source.read();
                }

                boolean dotNotRead = c != '.';
                while (Character.isDigit(c = (char) source.peek()) || dotNotRead && c == '.'
                        || c == '-')
                {
                    if (c != '-')
                    {
                        // PDFBOX-4064: ignore "-" in the middle of a number
                        buf.append(c);
                    }
                    source.read();

                    if (dotNotRead && c == '.')
                    {
                        dotNotRead = false;
                    }
                }
                String s = buf.toString();
                if ("+".equals(s))
                {
                    // PDFBOX-5906
                    LOG.warn("isolated '+' is ignored");
                    return COSNull.NULL;
                }
                return COSNumber.get(s);
            case 'B':
                String nextOperator = readString();
                Operator beginImageOP = Operator.getOperator(nextOperator);
                if (nextOperator.equals(OperatorName.BEGIN_INLINE_IMAGE))
                {
                    COSDictionary imageParams = new COSDictionary();
                    beginImageOP.setImageParameters( imageParams );
                    Object nextToken = null;
                    while( (nextToken = parseNextToken()) instanceof COSName )
                    {
                        Object value = parseNextToken();
                        if (!(value instanceof COSBase))
                        {
                            LOG.warn("Unexpected token in inline image dictionary at offset {}",
                                    source.isClosed() ? "EOF" : source.getPosition());
                            break;
                        }
                        imageParams.setItem( (COSName)nextToken, (COSBase)value );
                    }
                    //final token will be the image data, maybe??
                    if (nextToken instanceof Operator)
                    {
                        Operator imageData = (Operator) nextToken;
                        if (imageData.getImageData() == null || imageData.getImageData().length == 0)
                        {
                            LOG.warn("empty inline image at stream offset {}",
                                    source.getPosition());
                        }
                        beginImageOP.setImageData(imageData.getImageData());
                    }
                }
                return beginImageOP;
            case 'I':
                //Special case for ID operator
                String id = Character.toString((char) source.read()) + (char) source.read();
                if (!id.equals(OperatorName.BEGIN_INLINE_IMAGE_DATA))
                {
                    long currentPosition = source.getPosition();
                    close();
                    throw new IOException( "Error: Expected operator 'ID' actual='" + id +
                            "' at stream offset " + currentPosition);
                }
                ByteArrayOutputStream imageData = new ByteArrayOutputStream();
                if( isWhitespace() )
                {
                    //pull off the whitespace character
                    source.read();
                }
                int lastByte = source.read();
                int currentByte = source.read();
                // PDF spec is kinda unclear about this. Should a whitespace
                // always appear before EI? Not sure, so that we just read
                // until EI<whitespace>.
                // Be aware not all kind of whitespaces are allowed here. see PDFBOX-1561
                while( !(lastByte == 'E' &&
                         currentByte == 'I' &&
                         hasNextSpaceOrReturn() &&
                    hasNoFollowingBinData()) &&
                    !isEOF())
                {
                    imageData.write( lastByte );
                    lastByte = currentByte;
                    currentByte = source.read();
                }
                // the EI operator isn't unread, as it won't be processed anyway
                Operator beginImageDataOP = Operator
                        .getOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA);
                // save the image data to the operator, so that it can be accessed later
                beginImageDataOP.setImageData(imageData.toByteArray());
                return beginImageDataOP;
            case ']':
                // some ']' around without its previous '['
                // this means a PDF is somewhat corrupt but we will continue to parse.
                source.read();
                
                // must be a better solution than null...
                return COSNull.NULL;
            default:
                // we must be an operator
                String operator = readOperator().trim();
                if (!operator.isEmpty())
                {
                    return Operator.getOperator(operator);
                }
        }
        return null;
    }