You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2019/06/30 13:56:57 UTC
svn commit: r1862349 - in /pdfbox/branches/issue45: ./
preflight/src/main/java/org/apache/pdfbox/preflight/
preflight/src/main/java/org/apache/pdfbox/preflight/parser/
preflight/src/main/java/org/apache/pdfbox/preflight/process/
preflight/src/test/java...
Author: lehmi
Date: Sun Jun 30 13:56:57 2019
New Revision: 1862349
URL: http://svn.apache.org/viewvc?rev=1862349&view=rev
Log:
PDFBOX-4588: move stream length validation to PreflightParser
Modified:
pdfbox/branches/issue45/ (props changed)
pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java
Propchange: pdfbox/branches/issue45/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jun 30 13:56:57 2019
@@ -2,4 +2,4 @@
/pdfbox/branches/issue4569:1861285,1861586
/pdfbox/branches/no-awt:1618517-1621410
/pdfbox/no-awt:1618514-1618516
-/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510,1859664,1859686,1861927,1861933
+/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510,1859664,1859686,1861927,1861933,1862347
Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1862349&r1=1862348&r2=1862349&view=diff
==============================================================================
--- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original)
+++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Sun Jun 30 13:56:57 2019
@@ -52,11 +52,6 @@ public class PreflightContext implements
private PreflightDocument document = null;
/**
- * The datasource to load the document from. Needed by StreamValidationProcess.
- */
- private DataSource dataSource = null;
-
- /**
* Contains all Xref/trailer objects and resolves them into single object using startxref reference.
*/
private XrefTrailerResolver xrefTrailerResolver;
@@ -91,14 +86,12 @@ public class PreflightContext implements
*
* @param dataSource
*/
- public PreflightContext(DataSource dataSource)
+ public PreflightContext()
{
- this.dataSource = dataSource;
}
- public PreflightContext(DataSource dataSource, PreflightConfiguration configuration)
+ public PreflightContext(PreflightConfiguration configuration)
{
- this.dataSource = dataSource;
this.config = configuration;
}
@@ -148,20 +141,6 @@ public class PreflightContext implements
}
/**
- *
- * @return The datasource of the pdf document
- */
- public DataSource getSource()
- {
- return dataSource;
- }
-
- public boolean isComplete()
- {
- return (document != null) && (dataSource != null);
- }
-
- /**
* Add a FontContainer to allow TextObject validation.
*
* @param cBase the COSBase for the font container.
Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1862349&r1=1862348&r2=1862349&view=diff
==============================================================================
--- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun Jun 30 13:56:57 2019
@@ -72,6 +72,7 @@ import static org.apache.pdfbox.prefligh
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
@@ -277,7 +278,7 @@ public class PreflightParser extends PDF
*/
protected void createContext()
{
- this.ctx = new PreflightContext(this.dataSource);
+ ctx = new PreflightContext();
ctx.setDocument(preflightDocument);
preflightDocument.setContext(ctx);
ctx.setXrefTrailerResolver(xrefTrailerResolver);
@@ -510,9 +511,9 @@ public class PreflightParser extends PDF
@Override
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
- checkStreamKeyWord();
+ long startOffset = checkStreamKeyWord();
COSStream result = super.parseCOSStream(dic);
- checkEndstreamKeyWord();
+ checkEndstreamKeyWord(dic, startOffset);
return result;
}
@@ -521,7 +522,7 @@ public class PreflightParser extends PDF
*
* @throws IOException
*/
- protected void checkStreamKeyWord() throws IOException
+ private long checkStreamKeyWord() throws IOException
{
String streamV = readString();
if (!streamV.equals("stream"))
@@ -529,14 +530,24 @@ public class PreflightParser extends PDF
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition()));
}
+ long startOffset = source.getPosition();
int nextChar = source.read();
- if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10))
+ if (nextChar == 13 && source.peek() == 10)
+ {
+ startOffset += 2;
+ }
+ else if (nextChar == 10)
+ {
+ startOffset++;
+ }
+ else
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'EOL' after the stream keyword at offset "+source.getPosition()));
}
// set the offset before stream
source.seek(source.getPosition() - 7);
+ return startOffset;
}
/**
@@ -544,19 +555,52 @@ public class PreflightParser extends PDF
*
* @throws IOException
*/
- protected void checkEndstreamKeyWord() throws IOException
+ private void checkEndstreamKeyWord(COSDictionary dic, long startOffset)
+ throws IOException
{
source.seek(source.getPosition() - 10);
- if (!nextIsEOL())
+ long endOffset = source.getPosition();
+ int nextChar = source.read();
+ boolean eolFound = false;
+ boolean crlfFound = false;
+ // LF found
+ if (nextChar == '\n')
+ {
+ eolFound = true;
+ // check if the LF is part of a CRLF
+ source.rewind(2);
+ if (source.read() == '\r')
+ {
+ endOffset--;
+ crlfFound = true;
+ }
+ source.read();
+ }
+ boolean addStreamLengthErrorMessage = false;
+ long actualLength = endOffset - startOffset;
+ if (!eolFound)
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'"));
+ addStreamLengthErrorMessage = true;
}
String endstreamV = readString();
if (!endstreamV.equals("endstream"))
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'"));
+ addStreamLengthErrorMessage = true;
+ }
+
+ int length = dic.getInt(COSName.LENGTH);
+ if (addStreamLengthErrorMessage || //
+ (length > -1 && ((!crlfFound && length - actualLength != 0)
+ || (crlfFound && length - actualLength > 1))))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
+ "Stream length is invalid [dic=" + dic + "; defined length=" + length
+ + "; actual length=" + actualLength + ", starting offset="
+ + startOffset));
}
}
Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java?rev=1862349&r1=1862348&r2=1862349&view=diff
==============================================================================
--- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java (original)
+++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java Sun Jun 30 13:56:57 2019
@@ -21,15 +21,10 @@
package org.apache.pdfbox.preflight.process;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING;
-import java.io.IOException;
-import java.io.InputStream;
import java.util.List;
import org.apache.pdfbox.cos.COSArray;
@@ -38,21 +33,16 @@ import org.apache.pdfbox.cos.COSDocument
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.COSUtils;
import org.apache.pdfbox.preflight.utils.FilterHelper;
-import org.apache.pdfbox.util.Charsets;
public class StreamValidationProcess extends AbstractProcess
{
- private static final String ENDSTREAM = "endstream";
-
@Override
public void validate(PreflightContext ctx) throws ValidationException
{
@@ -81,8 +71,6 @@ public class StreamValidationProcess ext
// ---- Only the Length entry is mandatory
// ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden
checkDictionaryEntries(context, streamObj);
- // ---- check stream length
- checkStreamLength(context, cObj);
// ---- Check the Filter value(s)
checkFilters(streamObj, context);
}
@@ -123,184 +111,6 @@ public class StreamValidationProcess ext
// else Filter entry is optional
}
- private boolean readUntilStream(InputStream ra) throws IOException
- {
- boolean search = true;
- boolean maybe = false;
- int lastChar = -1;
- do
- {
- int c = ra.read();
- switch (c)
- {
- case 's':
- maybe = true;
- lastChar = c;
- break;
- case 't':
- if (maybe && lastChar == 's')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- lastChar = -1;
- }
- break;
- case 'r':
- if (maybe && lastChar == 't')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- lastChar = -1;
- }
- break;
- case 'e':
- if (maybe && lastChar == 'r')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- }
- break;
- case 'a':
- if (maybe && lastChar == 'e')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- }
- break;
- case 'm':
- if (maybe && lastChar == 'a')
- {
- return true;
- }
- else
- {
- maybe = false;
- }
- break;
- case -1:
- search = false;
- break;
- default:
- maybe = false;
- break;
- }
- } while (search);
- return false;
- }
-
- protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException
- {
- COSStream streamObj = (COSStream) cObj.getObject();
- int length = streamObj.getInt(COSName.LENGTH);
- InputStream ra = null;
- try
- {
- ra = context.getSource().getInputStream();
- Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj));
-
- // ---- go to the beginning of the object
- long skipped = 0;
- if (offset != null)
- {
- while (skipped != offset)
- {
- long curSkip = ra.skip(offset - skipped);
- if (curSkip < 0)
- {
- addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED, "Unable to skip bytes in the PDFFile to check stream length"));
- return;
- }
- skipped += curSkip;
- }
-
- // ---- go to the stream key word
- if (readUntilStream(ra))
- {
- int c = ra.read();
- // "stream" has to be followed by a LF or CRLF
- if ((c != '\r' && c != '\n') //
- || (c == '\r' && ra.read() != '\n'))
- {
- addValidationError(context,
- new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'EOL' after the stream keyword not found"));
- return;
- }
- // ---- Here is the true beginning of the Stream Content.
- // ---- Read the given length of bytes and check the 10 next bytes
- // ---- to see if there are endstream.
- byte[] buffer = new byte[1024];
- int nbBytesToRead = length;
-
- do
- {
- int cr;
- if (nbBytesToRead > buffer.length)
- {
- cr = ra.read(buffer);
- }
- else
- {
- cr = ra.read(buffer, 0, nbBytesToRead);
- }
- if (cr == -1)
- {
- addStreamLengthValidationError(context, cObj, length, "");
- return;
- }
- else
- {
- nbBytesToRead -= cr;
- }
- }
- while (nbBytesToRead > 0);
-
- int len = ENDSTREAM.length() + 2;
- byte[] buffer2 = new byte[len];
- ra.read(buffer2);
-
- // ---- check the content of 10 last characters
- // there has to be an proceeding EOL (LF or CRLF)
- String endStream = new String(buffer2, Charsets.ISO_8859_1);
- if ((buffer2[0] != '\r' && buffer2[0] != '\n') //
- || (buffer2[0] == '\r' && buffer2[1] != '\n') //
- || (buffer2[0] == '\n' && buffer2[1] != 'e') //
- || !endStream.contains(ENDSTREAM))
- {
- // TODO in some cases it is hard to say if the reason for this issue is a missing EOL or a wrong
- // stream length, see isartor-6-1-7-t03-fail-a.pdf
- // the implementation has to be adjusted similar to PreflightParser#parseCOSStream
- addStreamLengthValidationError(context, cObj, length, endStream);
- }
- }
- else
- {
- addStreamLengthValidationError(context, cObj, length, "");
- }
- }
- }
- catch (IOException e)
- {
- throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e);
- }
- finally
- {
- IOUtils.closeQuietly(ra);
- }
- }
-
/**
* Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are
* forbidden
@@ -328,10 +138,4 @@ public class StreamValidationProcess ext
}
}
- private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length, String endStream)
- {
- addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
- "Stream length is invalid [cObj=" + cObj + "; defined length=" + length + "; buffer2=" + endStream + "]"));
- }
-
}
Modified: pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java?rev=1862349&r1=1862348&r2=1862349&view=diff
==============================================================================
--- pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java (original)
+++ pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java Sun Jun 30 13:56:57 2019
@@ -21,6 +21,7 @@
package org.apache.pdfbox.preflight.action.pdfa1b;
+import java.io.File;
import java.util.List;
import javax.activation.DataSource;
import javax.activation.FileDataSource;
@@ -49,10 +50,10 @@ public abstract class AbstractTestAction
*/
protected PreflightContext createContext() throws Exception
{
- DataSource ds = new FileDataSource("src/test/resources/pdfa-with-annotations-square.pdf");
- PDDocument doc = PDDocument.load(ds.getInputStream());
+ PDDocument doc = PDDocument
+ .load(new File("src/test/resources/pdfa-with-annotations-square.pdf"));
PreflightDocument preflightDocument = new PreflightDocument(doc.getDocument(), Format.PDF_A1B);
- PreflightContext ctx = new PreflightContext(ds);
+ PreflightContext ctx = new PreflightContext();
ctx.setDocument(preflightDocument);
preflightDocument.setContext(ctx);
return ctx;