You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2019/06/30 13:43:51 UTC

svn commit: r1862347 - in /pdfbox/trunk/preflight/src: main/java/org/apache/pdfbox/preflight/ main/java/org/apache/pdfbox/preflight/parser/ main/java/org/apache/pdfbox/preflight/process/ test/java/org/apache/pdfbox/preflight/action/pdfa1b/

Author: lehmi
Date: Sun Jun 30 13:43:51 2019
New Revision: 1862347

URL: http://svn.apache.org/viewvc?rev=1862347&view=rev
Log:
PDFBOX-4588: move stream length validation to PreflightParser

Modified:
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
    pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Sun Jun 30 13:43:51 2019
@@ -35,7 +35,6 @@ import org.apache.pdfbox.pdmodel.common.
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.font.container.FontContainer;
 import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper;
-import org.apache.pdfbox.preflight.utils.DataSource;
 import org.apache.xmpbox.XMPMetadata;
 
 public class PreflightContext implements Closeable
@@ -51,11 +50,6 @@ public class PreflightContext implements
     private PreflightDocument document = null;
 
     /**
-     * The datasource to load the document from. Needed by StreamValidationProcess.
-     */
-    private DataSource dataSource = null;
-
-    /**
      * Contains all Xref/trailer objects and resolves them into single object using startxref reference.
      */
     private XrefTrailerResolver xrefTrailerResolver;
@@ -90,14 +84,12 @@ public class PreflightContext implements
      * 
      * @param dataSource
      */
-    public PreflightContext(DataSource dataSource)
+    public PreflightContext()
     {
-        this.dataSource = dataSource;
     }
 
-    public PreflightContext(DataSource dataSource, PreflightConfiguration configuration)
+    public PreflightContext(PreflightConfiguration configuration)
     {
-        this.dataSource = dataSource;
         this.config = configuration;
     }
 
@@ -147,20 +139,6 @@ public class PreflightContext implements
     }
 
     /**
-     * 
-     * @return The datasource of the pdf document
-     */
-    public DataSource getDataSource()
-    {
-        return dataSource;
-    }
-
-    public boolean isComplete()
-    {
-        return (document != null) && (dataSource != null);
-    }
-
-    /**
      * Add a FontContainer to allow TextObject validation.
      * 
      * @param cBase the COSBase for the font container.

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun Jun 30 13:43:51 2019
@@ -70,6 +70,7 @@ import static org.apache.pdfbox.prefligh
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
 import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
@@ -277,7 +278,7 @@ public class PreflightParser extends PDF
      */
     protected void createContext()
     {
-        this.ctx = new PreflightContext(this.dataSource);
+        ctx = new PreflightContext();
         ctx.setDocument(preflightDocument);
         preflightDocument.setContext(ctx);
         ctx.setXrefTrailerResolver(xrefTrailerResolver);
@@ -510,9 +511,9 @@ public class PreflightParser extends PDF
     @Override
     protected COSStream parseCOSStream(COSDictionary dic) throws IOException
     {
-        checkStreamKeyWord();
+        long startOffset = checkStreamKeyWord();
         COSStream result = super.parseCOSStream(dic);
-        checkEndstreamKeyWord();
+        checkEndstreamKeyWord(dic, startOffset);
         return result;
     }
 
@@ -521,7 +522,7 @@ public class PreflightParser extends PDF
      * 
      * @throws IOException
      */
-    protected void checkStreamKeyWord() throws IOException
+    private long checkStreamKeyWord() throws IOException
     {
         String streamV = readString();
         if (!streamV.equals("stream"))
@@ -529,14 +530,24 @@ public class PreflightParser extends PDF
             addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                     "Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition()));
         }
+        long startOffset = source.getPosition();
         int nextChar = source.read();
-        if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10))
+        if (nextChar == 13 && source.peek() == 10)
+        {
+            startOffset += 2;
+        }
+        else if (nextChar == 10)
+        {
+            startOffset++;
+        }
+        else
         {
             addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                     "Expected 'EOL' after the stream keyword at offset "+source.getPosition()));
         }
         // set the offset before stream
         source.seek(source.getPosition() - 7);
+        return startOffset;
     }
 
     /**
@@ -544,19 +555,52 @@ public class PreflightParser extends PDF
      * 
      * @throws IOException
      */
-    protected void checkEndstreamKeyWord() throws IOException
+    private void checkEndstreamKeyWord(COSDictionary dic, long startOffset)
+            throws IOException
     {
         source.seek(source.getPosition() - 10);
-        if (!nextIsEOL())
+        long endOffset = source.getPosition();
+        int nextChar = source.read();
+        boolean eolFound = false;
+        boolean crlfFound = false;
+        // LF found
+        if (nextChar == '\n')
+        {
+            eolFound = true;
+            // check if the LF is part of a CRLF
+            source.rewind(2);
+            if (source.read() == '\r')
+            {
+                endOffset--;
+                crlfFound = true;
+            }
+            source.read();
+        }
+        boolean addStreamLengthErrorMessage = false;
+        long actualLength = endOffset - startOffset;
+        if (!eolFound)
         {
             addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                     "Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'"));
+            addStreamLengthErrorMessage = true;
         }
         String endstreamV = readString();
         if (!endstreamV.equals("endstream"))
         {
             addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                     "Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'"));
+            addStreamLengthErrorMessage = true;
+        }
+
+        int length = dic.getInt(COSName.LENGTH);
+        if (addStreamLengthErrorMessage || //
+                (length > -1 && ((!crlfFound && length - actualLength != 0)
+                        || (crlfFound && length - actualLength > 1))))
+        {
+            addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
+                    "Stream length is invalid [dic=" + dic + "; defined length=" + length
+                            + "; actual length=" + actualLength + ", starting offset="
+                            + startOffset));
         }
     }
 

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java Sun Jun 30 13:43:51 2019
@@ -21,15 +21,10 @@
 
 package org.apache.pdfbox.preflight.process;
 
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
 import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING;
 
-import java.io.IOException;
-import java.io.InputStream;
 import java.util.List;
 
 import org.apache.pdfbox.cos.COSArray;
@@ -37,20 +32,15 @@ import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.preflight.PreflightContext;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.exception.ValidationException;
 import org.apache.pdfbox.preflight.utils.FilterHelper;
-import org.apache.pdfbox.util.Charsets;
 
 public class StreamValidationProcess extends AbstractProcess
 {
 
-    private static final String ENDSTREAM = "endstream";
-
     @Override
     public void validate(PreflightContext ctx) throws ValidationException
     {
@@ -76,8 +66,6 @@ public class StreamValidationProcess ext
         // ---- Only the Length entry is mandatory
         // ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden
         checkDictionaryEntries(context, streamObj);
-        // ---- check stream length
-        checkStreamLength(context, cObj);
         // ---- Check the Filter value(s)
         checkFilters(streamObj, context);
     }
@@ -117,184 +105,6 @@ public class StreamValidationProcess ext
         // else Filter entry is optional
     }
 
-    private boolean readUntilStream(InputStream ra) throws IOException
-    {
-        boolean search = true;
-        boolean maybe = false;
-        int lastChar = -1;
-        do
-        {
-            int c = ra.read();
-            switch (c)
-            {
-            case 's':
-                maybe = true;
-                lastChar = c;
-                break;
-            case 't':
-                if (maybe && lastChar == 's')
-                {
-                    lastChar = c;
-                }
-                else
-                {
-                    maybe = false;
-                    lastChar = -1;
-                }
-                break;
-            case 'r':
-                if (maybe && lastChar == 't')
-                {
-                    lastChar = c;
-                }
-                else
-                {
-                    maybe = false;
-                    lastChar = -1;
-                }
-                break;
-            case 'e':
-                if (maybe && lastChar == 'r')
-                {
-                    lastChar = c;
-                }
-                else
-                {
-                    maybe = false;
-                }
-                break;
-            case 'a':
-                if (maybe && lastChar == 'e')
-                {
-                    lastChar = c;
-                }
-                else
-                {
-                    maybe = false;
-                }
-                break;
-            case 'm':
-                if (maybe && lastChar == 'a')
-                {
-                    return true;
-                }
-                else
-                {
-                    maybe = false;
-                }
-                break;
-            case -1:
-                search = false;
-                break;
-            default:
-                maybe = false;
-                break;
-            }
-        } while (search);
-        return false;
-    }
-
-    protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException
-    {
-        COSStream streamObj = (COSStream) cObj.getObject();
-        int length = streamObj.getInt(COSName.LENGTH);
-        InputStream ra = null;
-        try
-        {
-            ra = context.getDataSource().getInputStream();
-            Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj));
-
-            // ---- go to the beginning of the object
-            long skipped = 0;
-            if (offset != null)
-            {
-                while (skipped != offset)
-                {
-                    long curSkip = ra.skip(offset - skipped);
-                    if (curSkip < 0)
-                    {
-                        addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED, "Unable to skip bytes in the PDFFile to check stream length"));
-                        return;
-                    }
-                    skipped += curSkip;
-                }
-
-                // ---- go to the stream key word
-                if (readUntilStream(ra))
-                {
-                    int c = ra.read();
-                    // "stream" has to be followed by a LF or CRLF
-                    if ((c != '\r' && c != '\n') //
-                            || (c == '\r' && ra.read() != '\n'))
-                    {
-                        addValidationError(context,
-                                new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
-                                        "Expected 'EOL' after the stream keyword not found"));
-                        return;
-                    }
-                    // ---- Here is the true beginning of the Stream Content.
-                    // ---- Read the given length of bytes and check the 10 next bytes
-                    // ---- to see if there are endstream.
-                    byte[] buffer = new byte[1024];
-                    int nbBytesToRead = length;
-
-                    do
-                    {
-                        int cr;
-                        if (nbBytesToRead > buffer.length)
-                        {
-                            cr = ra.read(buffer);
-                        }
-                        else
-                        {
-                            cr = ra.read(buffer, 0, nbBytesToRead);
-                        }
-                        if (cr == -1)
-                        {
-                            addStreamLengthValidationError(context, cObj, length, "");
-                            return;
-                        }
-                        else
-                        {
-                            nbBytesToRead -= cr;
-                        }
-                    }
-                    while (nbBytesToRead > 0);
-
-                    int len = ENDSTREAM.length() + 2;
-                    byte[] buffer2 = new byte[len];
-                    ra.read(buffer2);
-
-                    // ---- check the content of 10 last characters
-                    // there has to be an proceeding EOL (LF or CRLF)
-                    String endStream = new String(buffer2, Charsets.ISO_8859_1);
-                    if ((buffer2[0] != '\r' && buffer2[0] != '\n') //
-                            || (buffer2[0] == '\r' && buffer2[1] != '\n') //
-                            || (buffer2[0] == '\n' && buffer2[1] != 'e') //
-                            || !endStream.contains(ENDSTREAM))
-                    {
-                        // TODO in some cases it is hard to say if the reason for this issue is a missing EOL or a wrong
-                        // stream length, see isartor-6-1-7-t03-fail-a.pdf
-                        // the implementation has to be adjusted similar to PreflightParser#parseCOSStream
-                        addStreamLengthValidationError(context, cObj, length, endStream);
-                    }
-                }
-                else
-                {
-                    addStreamLengthValidationError(context, cObj, length, "");
-                }
-            }
-        }
-        catch (IOException e)
-        {
-            throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e);
-        }
-        finally
-        {
-            IOUtils.closeQuietly(ra);
-        }
-    }
-
     /**
      * Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are
      * forbidden
@@ -322,10 +132,4 @@ public class StreamValidationProcess ext
         }
     }
     
-    private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length, String endStream)
-    {
-        addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
-                "Stream length is invalid [cObj=" + cObj + "; defined length=" + length + "; buffer2=" + endStream + "]"));
-    }
-
 }

Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java Sun Jun 30 13:43:51 2019
@@ -34,8 +34,6 @@ import org.apache.pdfbox.preflight.Prefl
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.action.AbstractActionManager;
 import org.apache.pdfbox.preflight.action.ActionManagerFactory;
-import org.apache.pdfbox.preflight.utils.DataSource;
-import org.apache.pdfbox.preflight.utils.FileDataSource;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -50,10 +48,10 @@ public abstract class AbstractTestAction
      */
     protected PreflightContext createContext() throws Exception
     {
-        DataSource ds = new FileDataSource(new File("src/test/resources/pdfa-with-annotations-square.pdf"));
-        PDDocument doc = PDDocument.load(ds.getInputStream());
+        PDDocument doc = PDDocument
+                .load(new File("src/test/resources/pdfa-with-annotations-square.pdf"));
         PreflightDocument preflightDocument = new PreflightDocument(doc.getDocument(), Format.PDF_A1B);
-        PreflightContext ctx = new PreflightContext(ds);
+        PreflightContext ctx = new PreflightContext();
         ctx.setDocument(preflightDocument);
         preflightDocument.setContext(ctx);
         return ctx;