You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2015/04/24 23:39:39 UTC
svn commit: r1675962 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser:
BaseParser.java COSParser.java
Author: tilman
Date: Fri Apr 24 21:39:39 2015
New Revision: 1675962
URL: http://svn.apache.org/r1675962
Log:
PDFBOX-2768: delete sequential parseCOSStream, no longer needed
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1675962&r1=1675961&r2=1675962&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Apr 24 21:39:39 2015
@@ -38,7 +38,6 @@ import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
-import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.PushBackInputStream;
import org.apache.pdfbox.cos.COSObjectKey;
import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
@@ -350,172 +349,6 @@ public abstract class BaseParser impleme
return obj;
}
- /**
- * This will read a COSStream from the input stream.
- *
- * @param dic The dictionary that goes with this stream.
- *
- * @return The parsed pdf stream.
- *
- * @throws IOException If there is an error reading the stream.
- */
- protected COSStream parseCOSStream( COSDictionary dic ) throws IOException
- {
- COSStream stream = createCOSStream( dic );
- OutputStream out = null;
- try
- {
- readExpectedString(STREAM_STRING);
-
- skipWhiteSpaces();
-
- // This needs to be dic.getItem because when we are parsing, the underlying object
- // might still be null.
- COSBase streamLength = dic.getItem(COSName.LENGTH);
-
- //Need to keep track of the
- out = stream.createFilteredStream( streamLength );
-
- // try to read stream length - even if it is an indirect object
- int length = -1;
- if ( streamLength instanceof COSNumber )
- {
- length = ( (COSNumber) streamLength).intValue();
- }
- if ( length == -1 )
- {
- // Couldn't determine length from dict: just
- // scan until we find endstream:
- readUntilEndStream( new EndstreamOutputStream(out) );
- }
- else
- {
- // Copy length bytes over:
- int left = length;
- while ( left > 0 )
- {
- final int chunk = Math.min( left, STRMBUFLEN );
- final int readCount = pdfSource.read( strmBuf, 0, chunk );
- if ( readCount == -1 )
- {
- break;
- }
- out.write( strmBuf, 0, readCount );
- left -= readCount;
- }
-
- // in order to handle broken documents we test if 'endstream' is reached
- // if not, length value possibly was wrong, fall back to scanning for endstream
-
- // fill buffer with next bytes and test for 'endstream' (with leading whitespaces)
- int readCount = pdfSource.read( strmBuf, 0, 20 );
- if ( readCount > 0 )
- {
- boolean foundEndstream = false;
- int nextEndstreamCIdx = 0;
- for ( int cIdx = 0; cIdx < readCount; cIdx++ )
- {
- final int ch = strmBuf[ cIdx ] & 0xff;
- if ( ch == ENDSTREAM[ nextEndstreamCIdx ] )
- {
- if ( ++nextEndstreamCIdx >= ENDSTREAM.length )
- {
- foundEndstream = true;
- break;
- }
- }
- else if ( ( nextEndstreamCIdx > 0 ) || ( ! isWhitespace( ch ) ) )
- {
- // not found
- break;
- }
- }
-
- // push back test bytes
- pdfSource.unread( strmBuf, 0, readCount );
-
- // if 'endstream' was not found fall back to scanning
- if ( ! foundEndstream )
- {
- LOG.warn("Specified stream length " + length
- + " is wrong. Fall back to reading stream until 'endstream'.");
-
- // push back all read stream bytes
- // we got a buffered stream wrapper around filteredStream thus first flush to underlying stream
- out.flush();
- InputStream writtenStreamBytes = stream.getFilteredStream();
- ByteArrayOutputStream bout = new ByteArrayOutputStream( length );
-
- IOUtils.copy(writtenStreamBytes, bout);
- IOUtils.closeQuietly(writtenStreamBytes);
- try
- {
- pdfSource.unread( bout.toByteArray() );
- }
- catch ( IOException ioe )
- {
- throw new IOException( "Could not push back " + bout.size() +
- " bytes in order to reparse stream. " +
- "Try increasing push back buffer using system property " +
- PROP_PUSHBACK_SIZE, ioe );
- }
- // close and create new filtered stream
- IOUtils.closeQuietly(out);
- out = stream.createFilteredStream();
- // scan until we find endstream:
- readUntilEndStream( new EndstreamOutputStream(out) );
- }
- }
- }
-
- skipSpaces();
- String endStream = readString();
-
- if (!endStream.equals(ENDSTREAM_STRING))
- {
- /*
- * Sometimes stream objects don't have an endstream tag so readUntilEndStream(out)
- * also can stop on endobj tags. If that's the case we need to make sure to unread
- * the endobj so parseObject() can handle that case normally.
- */
- if (endStream.startsWith(ENDOBJ_STRING))
- {
- byte[] endobjarray = endStream.getBytes(ISO_8859_1);
- pdfSource.unread(endobjarray);
- }
- /*
- * Some PDF files don't contain a new line after endstream so we
- * need to make sure that the next object number is getting read separately
- * and not part of the endstream keyword. Ex. Some files would have "endstream8"
- * instead of "endstream"
- */
- else if(endStream.startsWith(ENDSTREAM_STRING))
- {
- String extra = endStream.substring(9, endStream.length());
- byte[] array = extra.getBytes(ISO_8859_1);
- pdfSource.unread(array);
- }
- else
- {
- /*
- * If for some reason we get something else here, Read until we find the next
- * "endstream"
- */
- readUntilEndStream( new EndstreamOutputStream(out) );
- readExpectedString(ENDSTREAM_STRING);
- }
- }
- }
- finally
- {
- if( out != null )
- {
- out.close();
- }
- }
- return stream;
- }
-
protected void skipWhiteSpaces() throws IOException
{
//PDF Ref 3.2.7 A stream must be followed by either
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1675962&r1=1675961&r2=1675962&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Fri Apr 24 21:39:39 2015
@@ -891,7 +891,6 @@ public class COSParser extends BaseParse
* @throws IOException if an error occurred reading the stream, like problems with reading length attribute, stream
* does not end with 'endstream' after data read, stream too short etc.
*/
- @Override
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
final COSStream stream = createCOSStream(dic);