You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2015/04/24 23:39:39 UTC

svn commit: r1675962 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: BaseParser.java COSParser.java

Author: tilman
Date: Fri Apr 24 21:39:39 2015
New Revision: 1675962

URL: http://svn.apache.org/r1675962
Log:
PDFBOX-2768: delete sequential parseCOSStream, no longer needed

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1675962&r1=1675961&r2=1675962&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Apr 24 21:39:39 2015
@@ -38,7 +38,6 @@ import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
-import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.PushBackInputStream;
 import org.apache.pdfbox.cos.COSObjectKey;
 import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
@@ -350,172 +349,6 @@ public abstract class BaseParser impleme
         return obj;
     }
 
-    /**
-     * This will read a COSStream from the input stream.
-     *
-     * @param dic The dictionary that goes with this stream.
-     *
-     * @return The parsed pdf stream.
-     *
-     * @throws IOException If there is an error reading the stream.
-     */
-    protected COSStream parseCOSStream( COSDictionary dic ) throws IOException
-    {
-        COSStream stream = createCOSStream( dic );
-        OutputStream out = null;
-        try
-        {
-            readExpectedString(STREAM_STRING);
-
-            skipWhiteSpaces();
-
-            // This needs to be dic.getItem because when we are parsing, the underlying object
-            // might still be null.
-            COSBase streamLength = dic.getItem(COSName.LENGTH);
-
-            //Need to keep track of the
-            out = stream.createFilteredStream( streamLength );
-
-            // try to read stream length - even if it is an indirect object
-            int length = -1;
-            if ( streamLength instanceof COSNumber )
-            {
-                length = ( (COSNumber) streamLength).intValue();
-            }
-            if ( length == -1 )
-            {
-                // Couldn't determine length from dict: just
-                // scan until we find endstream:
-                readUntilEndStream( new EndstreamOutputStream(out) );
-            }
-            else
-            {
-                // Copy length bytes over:
-                int left = length;
-                while ( left > 0 )
-                {
-                    final int chunk = Math.min( left, STRMBUFLEN );
-                    final int readCount = pdfSource.read( strmBuf, 0, chunk );
-                    if ( readCount == -1 )
-                    {
-                        break;
-                    }
-                    out.write( strmBuf, 0, readCount );
-                    left -= readCount;
-                }
-                
-                // in order to handle broken documents we test if 'endstream' is reached
-                // if not, length value possibly was wrong, fall back to scanning for endstream
-                
-                // fill buffer with next bytes and test for 'endstream' (with leading whitespaces)
-                int readCount = pdfSource.read( strmBuf, 0, 20 );
-                if ( readCount > 0 )
-                {
-                    boolean foundEndstream    = false;
-                    int     nextEndstreamCIdx = 0;
-                    for ( int cIdx = 0; cIdx < readCount; cIdx++ )
-                    {
-                        final int ch = strmBuf[ cIdx ] & 0xff; 
-                        if ( ch == ENDSTREAM[ nextEndstreamCIdx ] )
-                        {
-                            if ( ++nextEndstreamCIdx >= ENDSTREAM.length )
-                            {
-                                foundEndstream = true;
-                                break;
-                            }
-                        }
-                        else if ( ( nextEndstreamCIdx > 0 ) || ( ! isWhitespace( ch ) ) )
-                        {
-                            // not found
-                            break;
-                        }
-                    }
-                    
-                    // push back test bytes
-                    pdfSource.unread( strmBuf, 0, readCount );
-                    
-                    // if 'endstream' was not found fall back to scanning
-                    if ( ! foundEndstream )
-                    {
-                        LOG.warn("Specified stream length " + length 
-                                + " is wrong. Fall back to reading stream until 'endstream'.");
-                        
-                        // push back all read stream bytes
-                        // we got a buffered stream wrapper around filteredStream thus first flush to underlying stream
-                        out.flush();
-                        InputStream writtenStreamBytes = stream.getFilteredStream();
-                        ByteArrayOutputStream bout = new ByteArrayOutputStream( length );
-
-                        IOUtils.copy(writtenStreamBytes, bout);
-                        IOUtils.closeQuietly(writtenStreamBytes);
-                        try
-                        {
-                            pdfSource.unread( bout.toByteArray() );
-                        }
-                        catch ( IOException ioe )
-                        {
-                            throw new IOException( "Could not push back " + bout.size() +
-                                                   " bytes in order to reparse stream. " +
-                                                   "Try increasing push back buffer using system property " +
-                                                   PROP_PUSHBACK_SIZE, ioe );
-                        }
-                        // close and create new filtered stream
-                        IOUtils.closeQuietly(out);
-                        out = stream.createFilteredStream();
-                        // scan until we find endstream:
-                        readUntilEndStream( new EndstreamOutputStream(out) );
-                    }
-                }
-            }
-            
-            skipSpaces();
-            String endStream = readString();
-
-            if (!endStream.equals(ENDSTREAM_STRING))
-            {
-                /*
-                 * Sometimes stream objects don't have an endstream tag so readUntilEndStream(out)
-                 * also can stop on endobj tags. If that's the case we need to make sure to unread
-                 * the endobj so parseObject() can handle that case normally.
-                 */
-                if (endStream.startsWith(ENDOBJ_STRING))
-                {
-                    byte[] endobjarray = endStream.getBytes(ISO_8859_1);
-                    pdfSource.unread(endobjarray);
-                }
-                /*
-                 * Some PDF files don't contain a new line after endstream so we
-                 * need to make sure that the next object number is getting read separately
-                 * and not part of the endstream keyword. Ex. Some files would have "endstream8"
-                 * instead of "endstream"
-                 */
-                else if(endStream.startsWith(ENDSTREAM_STRING))
-                {
-                    String extra = endStream.substring(9, endStream.length());
-                    byte[] array = extra.getBytes(ISO_8859_1);
-                    pdfSource.unread(array);
-                }
-                else
-                {
-                    /*
-                     * If for some reason we get something else here, Read until we find the next
-                     * "endstream"
-                     */
-                    readUntilEndStream( new EndstreamOutputStream(out) );
-                    readExpectedString(ENDSTREAM_STRING);
-                }
-            }
-        }
-        finally
-        {
-            if( out != null )
-            {
-                out.close();
-            }
-        }
-        return stream;
-    }
-
     protected void skipWhiteSpaces() throws IOException
     {
         //PDF Ref 3.2.7 A stream must be followed by either

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1675962&r1=1675961&r2=1675962&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Fri Apr 24 21:39:39 2015
@@ -891,7 +891,6 @@ public class COSParser extends BaseParse
      * @throws IOException if an error occurred reading the stream, like problems with reading length attribute, stream
      * does not end with 'endstream' after data read, stream too short etc.
      */
-    @Override
     protected COSStream parseCOSStream(COSDictionary dic) throws IOException
     {
         final COSStream stream = createCOSStream(dic);