You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2019/06/30 13:43:51 UTC
svn commit: r1862347 - in /pdfbox/trunk/preflight/src:
main/java/org/apache/pdfbox/preflight/
main/java/org/apache/pdfbox/preflight/parser/
main/java/org/apache/pdfbox/preflight/process/
test/java/org/apache/pdfbox/preflight/action/pdfa1b/
Author: lehmi
Date: Sun Jun 30 13:43:51 2019
New Revision: 1862347
URL: http://svn.apache.org/viewvc?rev=1862347&view=rev
Log:
PDFBOX-4588: move stream length validation to PreflightParser
Modified:
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Sun Jun 30 13:43:51 2019
@@ -35,7 +35,6 @@ import org.apache.pdfbox.pdmodel.common.
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.font.container.FontContainer;
import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper;
-import org.apache.pdfbox.preflight.utils.DataSource;
import org.apache.xmpbox.XMPMetadata;
public class PreflightContext implements Closeable
@@ -51,11 +50,6 @@ public class PreflightContext implements
private PreflightDocument document = null;
/**
- * The datasource to load the document from. Needed by StreamValidationProcess.
- */
- private DataSource dataSource = null;
-
- /**
* Contains all Xref/trailer objects and resolves them into single object using startxref reference.
*/
private XrefTrailerResolver xrefTrailerResolver;
@@ -90,14 +84,12 @@ public class PreflightContext implements
*
* @param dataSource
*/
- public PreflightContext(DataSource dataSource)
+ public PreflightContext()
{
- this.dataSource = dataSource;
}
- public PreflightContext(DataSource dataSource, PreflightConfiguration configuration)
+ public PreflightContext(PreflightConfiguration configuration)
{
- this.dataSource = dataSource;
this.config = configuration;
}
@@ -147,20 +139,6 @@ public class PreflightContext implements
}
/**
- *
- * @return The datasource of the pdf document
- */
- public DataSource getDataSource()
- {
- return dataSource;
- }
-
- public boolean isComplete()
- {
- return (document != null) && (dataSource != null);
- }
-
- /**
* Add a FontContainer to allow TextObject validation.
*
* @param cBase the COSBase for the font container.
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun Jun 30 13:43:51 2019
@@ -70,6 +70,7 @@ import static org.apache.pdfbox.prefligh
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
@@ -277,7 +278,7 @@ public class PreflightParser extends PDF
*/
protected void createContext()
{
- this.ctx = new PreflightContext(this.dataSource);
+ ctx = new PreflightContext();
ctx.setDocument(preflightDocument);
preflightDocument.setContext(ctx);
ctx.setXrefTrailerResolver(xrefTrailerResolver);
@@ -510,9 +511,9 @@ public class PreflightParser extends PDF
@Override
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
- checkStreamKeyWord();
+ long startOffset = checkStreamKeyWord();
COSStream result = super.parseCOSStream(dic);
- checkEndstreamKeyWord();
+ checkEndstreamKeyWord(dic, startOffset);
return result;
}
@@ -521,7 +522,7 @@ public class PreflightParser extends PDF
*
* @throws IOException
*/
- protected void checkStreamKeyWord() throws IOException
+ private long checkStreamKeyWord() throws IOException
{
String streamV = readString();
if (!streamV.equals("stream"))
@@ -529,14 +530,24 @@ public class PreflightParser extends PDF
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition()));
}
+ long startOffset = source.getPosition();
int nextChar = source.read();
- if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10))
+ if (nextChar == 13 && source.peek() == 10)
+ {
+ startOffset += 2;
+ }
+ else if (nextChar == 10)
+ {
+ startOffset++;
+ }
+ else
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'EOL' after the stream keyword at offset "+source.getPosition()));
}
// set the offset before stream
source.seek(source.getPosition() - 7);
+ return startOffset;
}
/**
@@ -544,19 +555,52 @@ public class PreflightParser extends PDF
*
* @throws IOException
*/
- protected void checkEndstreamKeyWord() throws IOException
+ private void checkEndstreamKeyWord(COSDictionary dic, long startOffset)
+ throws IOException
{
source.seek(source.getPosition() - 10);
- if (!nextIsEOL())
+ long endOffset = source.getPosition();
+ int nextChar = source.read();
+ boolean eolFound = false;
+ boolean crlfFound = false;
+ // LF found
+ if (nextChar == '\n')
+ {
+ eolFound = true;
+ // check if the LF is part of a CRLF
+ source.rewind(2);
+ if (source.read() == '\r')
+ {
+ endOffset--;
+ crlfFound = true;
+ }
+ source.read();
+ }
+ boolean addStreamLengthErrorMessage = false;
+ long actualLength = endOffset - startOffset;
+ if (!eolFound)
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'"));
+ addStreamLengthErrorMessage = true;
}
String endstreamV = readString();
if (!endstreamV.equals("endstream"))
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
"Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'"));
+ addStreamLengthErrorMessage = true;
+ }
+
+ int length = dic.getInt(COSName.LENGTH);
+ if (addStreamLengthErrorMessage || //
+ (length > -1 && ((!crlfFound && length - actualLength != 0)
+ || (crlfFound && length - actualLength > 1))))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
+ "Stream length is invalid [dic=" + dic + "; defined length=" + length
+ + "; actual length=" + actualLength + ", starting offset="
+ + startOffset));
}
}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java Sun Jun 30 13:43:51 2019
@@ -21,15 +21,10 @@
package org.apache.pdfbox.preflight.process;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER;
-import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING;
-import java.io.IOException;
-import java.io.InputStream;
import java.util.List;
import org.apache.pdfbox.cos.COSArray;
@@ -37,20 +32,15 @@ import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.FilterHelper;
-import org.apache.pdfbox.util.Charsets;
public class StreamValidationProcess extends AbstractProcess
{
- private static final String ENDSTREAM = "endstream";
-
@Override
public void validate(PreflightContext ctx) throws ValidationException
{
@@ -76,8 +66,6 @@ public class StreamValidationProcess ext
// ---- Only the Length entry is mandatory
// ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden
checkDictionaryEntries(context, streamObj);
- // ---- check stream length
- checkStreamLength(context, cObj);
// ---- Check the Filter value(s)
checkFilters(streamObj, context);
}
@@ -117,184 +105,6 @@ public class StreamValidationProcess ext
// else Filter entry is optional
}
- private boolean readUntilStream(InputStream ra) throws IOException
- {
- boolean search = true;
- boolean maybe = false;
- int lastChar = -1;
- do
- {
- int c = ra.read();
- switch (c)
- {
- case 's':
- maybe = true;
- lastChar = c;
- break;
- case 't':
- if (maybe && lastChar == 's')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- lastChar = -1;
- }
- break;
- case 'r':
- if (maybe && lastChar == 't')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- lastChar = -1;
- }
- break;
- case 'e':
- if (maybe && lastChar == 'r')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- }
- break;
- case 'a':
- if (maybe && lastChar == 'e')
- {
- lastChar = c;
- }
- else
- {
- maybe = false;
- }
- break;
- case 'm':
- if (maybe && lastChar == 'a')
- {
- return true;
- }
- else
- {
- maybe = false;
- }
- break;
- case -1:
- search = false;
- break;
- default:
- maybe = false;
- break;
- }
- } while (search);
- return false;
- }
-
- protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException
- {
- COSStream streamObj = (COSStream) cObj.getObject();
- int length = streamObj.getInt(COSName.LENGTH);
- InputStream ra = null;
- try
- {
- ra = context.getDataSource().getInputStream();
- Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj));
-
- // ---- go to the beginning of the object
- long skipped = 0;
- if (offset != null)
- {
- while (skipped != offset)
- {
- long curSkip = ra.skip(offset - skipped);
- if (curSkip < 0)
- {
- addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED, "Unable to skip bytes in the PDFFile to check stream length"));
- return;
- }
- skipped += curSkip;
- }
-
- // ---- go to the stream key word
- if (readUntilStream(ra))
- {
- int c = ra.read();
- // "stream" has to be followed by a LF or CRLF
- if ((c != '\r' && c != '\n') //
- || (c == '\r' && ra.read() != '\n'))
- {
- addValidationError(context,
- new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'EOL' after the stream keyword not found"));
- return;
- }
- // ---- Here is the true beginning of the Stream Content.
- // ---- Read the given length of bytes and check the 10 next bytes
- // ---- to see if there are endstream.
- byte[] buffer = new byte[1024];
- int nbBytesToRead = length;
-
- do
- {
- int cr;
- if (nbBytesToRead > buffer.length)
- {
- cr = ra.read(buffer);
- }
- else
- {
- cr = ra.read(buffer, 0, nbBytesToRead);
- }
- if (cr == -1)
- {
- addStreamLengthValidationError(context, cObj, length, "");
- return;
- }
- else
- {
- nbBytesToRead -= cr;
- }
- }
- while (nbBytesToRead > 0);
-
- int len = ENDSTREAM.length() + 2;
- byte[] buffer2 = new byte[len];
- ra.read(buffer2);
-
- // ---- check the content of 10 last characters
- // there has to be an proceeding EOL (LF or CRLF)
- String endStream = new String(buffer2, Charsets.ISO_8859_1);
- if ((buffer2[0] != '\r' && buffer2[0] != '\n') //
- || (buffer2[0] == '\r' && buffer2[1] != '\n') //
- || (buffer2[0] == '\n' && buffer2[1] != 'e') //
- || !endStream.contains(ENDSTREAM))
- {
- // TODO in some cases it is hard to say if the reason for this issue is a missing EOL or a wrong
- // stream length, see isartor-6-1-7-t03-fail-a.pdf
- // the implementation has to be adjusted similar to PreflightParser#parseCOSStream
- addStreamLengthValidationError(context, cObj, length, endStream);
- }
- }
- else
- {
- addStreamLengthValidationError(context, cObj, length, "");
- }
- }
- }
- catch (IOException e)
- {
- throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e);
- }
- finally
- {
- IOUtils.closeQuietly(ra);
- }
- }
-
/**
* Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are
* forbidden
@@ -322,10 +132,4 @@ public class StreamValidationProcess ext
}
}
- private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length, String endStream)
- {
- addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID,
- "Stream length is invalid [cObj=" + cObj + "; defined length=" + length + "; buffer2=" + endStream + "]"));
- }
-
}
Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java?rev=1862347&r1=1862346&r2=1862347&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java Sun Jun 30 13:43:51 2019
@@ -34,8 +34,6 @@ import org.apache.pdfbox.preflight.Prefl
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.action.AbstractActionManager;
import org.apache.pdfbox.preflight.action.ActionManagerFactory;
-import org.apache.pdfbox.preflight.utils.DataSource;
-import org.apache.pdfbox.preflight.utils.FileDataSource;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -50,10 +48,10 @@ public abstract class AbstractTestAction
*/
protected PreflightContext createContext() throws Exception
{
- DataSource ds = new FileDataSource(new File("src/test/resources/pdfa-with-annotations-square.pdf"));
- PDDocument doc = PDDocument.load(ds.getInputStream());
+ PDDocument doc = PDDocument
+ .load(new File("src/test/resources/pdfa-with-annotations-square.pdf"));
PreflightDocument preflightDocument = new PreflightDocument(doc.getDocument(), Format.PDF_A1B);
- PreflightContext ctx = new PreflightContext(ds);
+ PreflightContext ctx = new PreflightContext();
ctx.setDocument(preflightDocument);
preflightDocument.setContext(ctx);
return ctx;