You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/09/24 23:28:35 UTC
svn commit: r1389604 - in /pdfbox/trunk/preflight: ./
src/main/java/org/apache/pdfbox/preflight/
src/main/java/org/apache/pdfbox/preflight/exception/
src/main/java/org/apache/pdfbox/preflight/parser/
src/main/java/org/apache/pdfbox/preflight/process/ s...
Author: leleueri
Date: Mon Sep 24 21:28:34 2012
New Revision: 1389604
URL: http://svn.apache.org/viewvc?rev=1389604&view=rev
Log:
[https://issues.apache.org/jira/browse/PDFBOX-1373] Syntax validation is done by the preflight parser
Added:
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java (with props)
Modified:
pdfbox/trunk/preflight/pom.xml
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
pdfbox/trunk/preflight/src/test/resources/expected_errors.txt
Modified: pdfbox/trunk/preflight/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/pom.xml?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/pom.xml (original)
+++ pdfbox/trunk/preflight/pom.xml Mon Sep 24 21:28:34 2012
@@ -235,6 +235,18 @@
<scope>test</scope>
</dependency>
-->
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcmail-jdk15</artifactId>
+ <version>1.44</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15</artifactId>
+ <version>1.44</version>
+ <optional>true</optional>
+ </dependency>
</dependencies>
<reporting>
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java Mon Sep 24 21:28:34 2012
@@ -339,6 +339,18 @@ public interface PreflightConstants {
* CID too long
*/
String ERROR_SYNTAX_CID_RANGE = "1.0.10";
+ /**
+ * Hexa string shall contain even number of non white space char
+ */
+ String ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER = "1.0.11";
+ /**
+ * Hexa string contain non hexadecimal characters
+ */
+ String ERROR_SYNTAX_HEXA_STRING_INVALID= "1.0.12";
+ /**
+ * An object is missing from the document (offset is negative)
+ */
+ String ERROR_SYNTAX_NEGATIVE_OFFSET = "1.0.13";
String ERROR_SYNTAX_HEADER = "1.1";
@@ -444,6 +456,10 @@ public interface PreflightConstants {
* Errors in the Outlines dictionary
*/
String ERROR_SYNTAX_TRAILER_OUTLINES_INVALID = "1.4.9";
+ /**
+ * Last %%EOF sequence is followed by data
+ */
+ String ERROR_SYNTAX_TRAILER_EOF = "1.4.10";
// -----------------------------------------------------------
// ---- GRAPHIC ERRORS 2.x...
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Mon Sep 24 21:28:34 2012
@@ -24,15 +24,16 @@ package org.apache.pdfbox.preflight;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.SortedMap;
import javax.activation.DataSource;
import org.apache.padaf.xmpbox.XMPMetadata;
import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.font.container.FontContainer;
import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
import org.apache.pdfbox.preflight.utils.COSUtils;
public class PreflightContext {
@@ -50,13 +51,18 @@ public class PreflightContext {
* The datasource to load the document from
*/
protected DataSource source = null;
-
- /**
- * JavaCC Token Manager used to get some content of the PDF file as string (ex
- * : Trailers)
- */
- protected ExtractorTokenManager pdfExtractor = null;
-
+//
+// /**
+// * JavaCC Token Manager used to get some content of the PDF file as string (ex
+// * : Trailers)
+// */
+// protected ExtractorTokenManager pdfExtractor = null;
+
+ /** Contains all Xref/trailer objects and resolves them into single
+ * object using startxref reference.
+ */
+ private XrefTrailerResolver xrefTableResolver;
+
/**
* This wrapper contains the ICCProfile used by the PDF file.
*/
@@ -103,21 +109,21 @@ public class PreflightContext {
this.metadata = metadata;
}
- /**
- * @return the value of the pdfExtractor attribute.
- */
- public ExtractorTokenManager getPdfExtractor() {
- return pdfExtractor;
- }
-
- /**
- * Initialize the pdfExtractor attribute.
- *
- * @param pdfExtractor
- */
- public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
- this.pdfExtractor = pdfExtractor;
- }
+// /**
+// * @return the value of the pdfExtractor attribute.
+// */
+// public ExtractorTokenManager getPdfExtractor() {
+// return pdfExtractor;
+// }
+//
+// /**
+// * Initialize the pdfExtractor attribute.
+// *
+// * @param pdfExtractor
+// */
+// public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
+// this.pdfExtractor = pdfExtractor;
+// }
/**
* @return the PDFBox object representation of the document
@@ -126,6 +132,14 @@ public class PreflightContext {
return document;
}
+ public XrefTrailerResolver getXrefTableResolver() {
+ return xrefTableResolver;
+ }
+
+ public void setXrefTableResolver(XrefTrailerResolver xrefTableResolver) {
+ this.xrefTableResolver = xrefTableResolver;
+ }
+
/**
* Initialize the PDFBox object which present the PDF File.
*
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java Mon Sep 24 21:28:34 2012
@@ -25,6 +25,7 @@ import javax.activation.FileDataSource;
import org.apache.pdfbox.Version;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
@@ -48,16 +49,20 @@ public class Validator_A1b {
System.exit(1);
}
+ ValidationResult result = null;
FileDataSource fd = new FileDataSource(args[0]);
-
PreflightParser parser = new PreflightParser(fd);
- parser.parse();
- PreflightDocument document = (PreflightDocument)parser.getPDDocument();
- document.validate();
+ try {
+ parser.parse();
+ PreflightDocument document = parser.getPreflightDocument();
+ document.validate();
+ result = document.getResult();
+ document.close();
+ } catch (SyntaxValidationException e) {
+ result = e.getResult();
+ }
- ValidationResult result = document.getResult();
if (result.isValid()) {
- document.close();
System.out.println("The file " + args[0] + " is a valid PDF/A-1b file");
System.exit(0);
} else {
@@ -66,7 +71,6 @@ public class Validator_A1b {
System.out.println(error.getErrorCode() + " : " + error.getDetails());
}
- document.close();
System.exit(-1);
}
}
Added: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java?rev=1389604&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java Mon Sep 24 21:28:34 2012
@@ -0,0 +1,49 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.pdfbox.preflight.exception;
+
+import org.apache.pdfbox.preflight.ValidationResult;
+
+public class SyntaxValidationException extends ValidationException {
+
+ private final ValidationResult result;
+
+ public SyntaxValidationException(String message, Throwable cause, ValidationResult result) {
+ super(message, cause);
+ this.result = result;
+ }
+
+ public SyntaxValidationException(String message, ValidationResult result) {
+ super(message);
+ this.result = result;
+ }
+
+ public SyntaxValidationException(Throwable cause, ValidationResult result) {
+ super(cause);
+ this.result = result;
+ }
+
+ public ValidationResult getResult() {
+ return result;
+ }
+
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Mon Sep 24 21:28:34 2012
@@ -21,17 +21,61 @@
package org.apache.pdfbox.preflight.parser;
+import static org.apache.pdfbox.preflight.PreflightConstants.*;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CROSS_REF;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_INVALID;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_TOO_LONG;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NAME_TOO_LONG;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DICT_ENTRIES;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NAME_SIZE;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_POSITIVE_FLOAT;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
+import java.util.Arrays;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import javax.activation.DataSource;
+import javax.activation.FileDataSource;
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.pdfparser.BaseParser;
+import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
+import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
import org.apache.pdfbox.preflight.Format;
import org.apache.pdfbox.preflight.PreflightConfiguration;
import org.apache.pdfbox.preflight.PreflightConstants;
@@ -39,13 +83,9 @@ import org.apache.pdfbox.preflight.Prefl
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
-import org.apache.pdfbox.preflight.exception.PdfParseException;
-import org.apache.pdfbox.preflight.exception.ValidationException;
-import org.apache.pdfbox.preflight.javacc.ParseException;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
-import org.apache.pdfbox.preflight.javacc.extractor.SimpleCharStream;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
-public class PreflightParser extends PDFParser {
+public class PreflightParser extends NonSequentialPDFParser {
/**
* Define a one byte encoding that hasn't specific encoding in UTF-8 charset.
* Avoid unexpected error when the encoding is Cp5816
@@ -56,18 +96,20 @@ public class PreflightParser extends PDF
protected ValidationResult validationResult;
- protected PreflightDocument document;
+ protected PreflightDocument preflightDocument;
protected PreflightContext ctx;
- public PreflightParser(DataSource input, RandomAccess rafi, boolean force) throws IOException {
- super(input.getInputStream(), rafi, force);
- this.originalDocument = input;
+ public PreflightParser(File file, RandomAccess rafi) throws IOException {
+ super(file, rafi);
+ this.originalDocument = new FileDataSource(file);
}
- public PreflightParser(DataSource input, RandomAccess rafi) throws IOException {
- super(input.getInputStream(), rafi);
- this.originalDocument = input;
+ public PreflightParser(File file) throws IOException {
+ this(file, null);
+ }
+ public PreflightParser(String filename) throws IOException {
+ this(new File(filename), null);
}
public PreflightParser(DataSource input) throws IOException {
@@ -75,29 +117,6 @@ public class PreflightParser extends PDF
this.originalDocument = input;
}
-
- /**
- * Create an instance of ValidationResult. This object contains an instance of
- * ValidationError. If the ParseException is an instance of PdfParseException,
- * the embedded validation error is initialized with the error code of the
- * exception, otherwise it is an UnknownError.
- *
- * @param e
- * @return
- */
- protected static ValidationResult createErrorResult(ParseException e) {
- if (e instanceof PdfParseException) {
- if (e.getCause()==null) {
- return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode()));
- } else if (e.getCause().getMessage()==null) {
- return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode()));
- } else {
- return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode(),e.getCause().getMessage()));
- }
- }
- return createUnknownErrorResult();
- }
-
/**
* Create an instance of ValidationResult with a
* ValidationError(UNKNOWN_ERROR)
@@ -152,58 +171,578 @@ public class PreflightParser extends PDF
* @throws IOException
*/
public void parse(Format format, PreflightConfiguration config) throws IOException {
- checkFileSyntax();
- // run PDFBox Parser
- super.parse();
+ checkPdfHeader();
+ try {
+ super.parse();
+ } catch (IOException e) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
+ throw new SyntaxValidationException(e, this.validationResult);
+ }
Format formatToUse = (format == null ? Format.PDF_A1B : format);
createPdfADocument(formatToUse, config);
createContext();
- extractTrailers();
}
-
+
+ protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException {
+ COSDocument cosDocument = getDocument();
+ this.preflightDocument = new PreflightDocument(cosDocument, format, config);
+ }
+
+ /**
+ * Create a validation context.
+ * This context is set to the PreflightDocument.
+ */
+ protected void createContext() {
+ this.ctx = new PreflightContext(this.originalDocument);
+ ctx.setDocument(preflightDocument);
+ preflightDocument.setContext(ctx);
+ ctx.setXrefTableResolver(xrefTrailerResolver);
+ }
+
+ @Override
+ public PDDocument getPDDocument() throws IOException {
+ preflightDocument.setResult(validationResult);
+ // Add XMP MetaData
+ return preflightDocument;
+ }
+
+ public PreflightDocument getPreflightDocument() throws IOException {
+ return (PreflightDocument)getPDDocument();
+ }
+
+
+ // --------------------------------------------------------
+ // - Below All methods that adds controls on the PDF syntax
+ // --------------------------------------------------------
+
+ @Override
+ /**
+ * Fill the CosDocument with some object that isn't set by the NonSequentialParser
+ */
+ protected void initialParse() throws IOException {
+ super.initialParse();
+
+ // fill xref table
+ document.addXRefTable(xrefTrailerResolver.getXrefTable());
+
+ // Trailer entries are useful in the preflight document
+ for (COSBase trailerEntry : getDocument().getTrailer().getValues()) {
+ if ( trailerEntry instanceof COSObject )
+ {
+ COSObject tmpObj = (COSObject) trailerEntry;
+ parseObjectDynamically( tmpObj, true );
+ }
+ }
+
+ // For each ObjectKey, we check if the object has been loaded
+ Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
+ for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) {
+ COSObject co = document.getObjectFromPool(entry.getKey());
+ if ( co.getObject() == null) {
+ // object isn't loaded - parse the object to load its content
+ parseObjectDynamically( co, true );
+ }
+ }
+ }
+
/**
- * Run the JavaCC parser to check the PDF syntax.
- * @throws ValidationException
+ * Check that the PDF header match rules of the PDF/A specification.
+ * First line (offset 0) must be a comment with the PDF version (version 1.0 isn't conform to the PDF/A specification)
+ * Second line is a comment with at least 4 bytes greater than 0x80
*/
- protected void checkFileSyntax() throws ValidationException {
- // syntax (javacc) validation
+ protected void checkPdfHeader() {
+ BufferedReader reader = null;
try {
- InputStreamReader reader = new InputStreamReader(this.originalDocument.getInputStream(), encoding);
- org.apache.pdfbox.preflight.javacc.PDFParser javaCCParser = new org.apache.pdfbox.preflight.javacc.PDFParser(reader);
- javaCCParser.PDF();
- IOUtils.closeQuietly(reader);
+ reader = new BufferedReader(new InputStreamReader(new FileInputStream(getPdfFile()), "ISO-8859-1"));
+ String firstLine = reader.readLine();
+ if (firstLine == null || (firstLine != null && !firstLine.matches("%PDF-1\\.[1-9]"))) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "First line must match %PDF-1.\\d"));
+ }
+
+ String secondLine = reader.readLine();
+ if (secondLine != null && secondLine.getBytes().length >= 5) {
+ for (int i = 0; i < secondLine.getBytes().length; ++i ) {
+ byte b = secondLine.getBytes()[i];
+ if (i == 0 && ((char)b != '%')) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
+ break;
+ } else if (i > 0 && ((b & 0xFF) < 0x80)) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
+ break;
+ }
+ }
+ } else {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER ,"Second line must contains at least 4 bytes greater than 127"));
+ }
+
} catch (IOException e) {
- throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e);
- } catch (ParseException e) {
- this.validationResult = createErrorResult(e);
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Unable to read the PDF file : " + e.getMessage()));
+ } finally {
+ IOUtils.closeQuietly(reader);
}
}
- protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException {
- this.document = new PreflightDocument(getDocument(), format, config);
+ /**
+ * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls :
+ * - EOL mandatory after the 'xref' keyword
+ * - Cross reference subsection header uses single white space as separator
+ * - and so on
+ */
+ protected boolean parseXrefTable( long startByteOffset ) throws IOException
+ {
+ if(pdfSource.peek() != 'x')
+ {
+ return false;
+ }
+ String xref = readString();
+ if( !xref.equals( "xref" ) )
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character"));
+ return false;
+ }
+ if (!nextIsEOL()) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL"));
+ }
+
+ // signal start of new XRef
+ xrefTrailerResolver.nextXrefObj( startByteOffset );
+
+ /*
+ * Xref tables can have multiple sections.
+ * Each starts with a starting object id and a count.
+ */
+ while(true)
+ {
+ // just after the xref<EOL> there are an integer
+ int currObjID = 0; // first obj id
+ int count = 0; // the number of objects in the xref table
+
+ long offset = pdfSource.getOffset();
+ String line = readLine();
+ Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.matches()) {
+ currObjID = Integer.parseInt(matcher.group(1));
+ count = Integer.parseInt(matcher.group(2));
+ } else {
+ addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid"));
+ // reset pdfSource cursor to read xref information
+ pdfSource.seek(offset);
+ currObjID = readInt(); // first obj id
+ count = readInt(); // the number of objects in the xref table
+ }
+
+ skipSpaces();
+ for(int i = 0; i < count; i++)
+ {
+ if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek()))
+ {
+ break;
+ }
+ if(pdfSource.peek() == 't')
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found"));
+ break;
+ }
+ //Ignore table contents
+ String currentLine = readLine();
+ String[] splitString = currentLine.split(" ");
+ if (splitString.length < 3)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine));
+ break;
+ }
+ /* This supports the corrupt table as reported in
+ * PDFBOX-474 (XXXX XXX XX n) */
+ if(splitString[splitString.length-1].equals("n"))
+ {
+ try
+ {
+ long currOffset = Long.parseLong(splitString[0]);
+ int currGenID = Integer.parseInt(splitString[1]);
+ COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
+ xrefTrailerResolver.setXRef(objKey, currOffset);
+ }
+ catch(NumberFormatException e)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage()));
+ }
+ }
+ else if(!splitString[2].equals("f"))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID));
+ }
+ currObjID++;
+ skipSpaces();
+ }
+ skipSpaces();
+ char c = (char)pdfSource.peek();
+ if(c < '0' || c > '9')
+ {
+ break;
+ }
+ }
+ return true;
}
- protected void createContext() {
- this.ctx = new PreflightContext(this.originalDocument);
- ctx.setDocument(document);
- document.setContext(ctx);
+ /**
+ * Wraps the {@link NonSequentialPDFParser#parseCOSStream} to check rules on 'stream' and 'endstream' keywords.
+ * {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
+ */
+ protected COSStream parseCOSStream( COSDictionary dic, RandomAccess file ) throws IOException {
+ checkStreamKeyWord();
+ COSStream result = super.parseCOSStream(dic, file);
+ checkEndstreamKeyWord();
+ return result;
+ }
+
+ /**
+ * 'stream' must be followed by <CR><LF> or only <LF>
+ * @throws IOException
+ */
+ protected void checkStreamKeyWord() throws IOException {
+ String streamV = readString();
+ if (!streamV.equals("stream")) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV +"'"));
+ }
+ int nextChar = pdfSource.read();
+ if ( !((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10)) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword"));
+ }
+ // set the offset before stream
+ pdfSource.seek(pdfSource.getOffset()-7);
+ }
+
+ /**
+ * 'endstream' must be preceded by an EOL
+ * @throws IOException
+ */
+ protected void checkEndstreamKeyWord() throws IOException {
+ pdfSource.seek(pdfSource.getOffset()-10);
+ if (!nextIsEOL()) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the endstream keyword"));
+ }
+ String endstreamV = readString();
+ if (!endstreamV.equals("endstream")) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword but found '" + endstreamV +"'"));
+ }
}
- protected void extractTrailers() throws IOException {
- SimpleCharStream scs = new SimpleCharStream(this.originalDocument.getInputStream());
- ExtractorTokenManager extractor = new ExtractorTokenManager(scs);
- extractor.parse();
- ctx.setPdfExtractor(extractor);
+ protected boolean nextIsEOL() throws IOException {
+ boolean succeed = false;
+ int nextChar = pdfSource.read();
+ if ( nextChar == 13 && pdfSource.peek() == 10 ) {
+ pdfSource.read();
+ succeed = true;
+ } else if ( nextChar == 13 || nextChar == 10 ) {
+ succeed = true;
+ }
+ return succeed;
+ }
+
+ /**
+ * @return true if the next character is a space. (The character is consumed)
+ * @throws IOException
+ */
+ protected boolean nextIsSpace() throws IOException {
+ return ' ' == pdfSource.read();
}
@Override
- public PDDocument getPDDocument() throws IOException {
- document.setResult(validationResult);
- // Add XMP MetaData
- return document;
+ /**
+ * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array
+ */
+ protected COSArray parseCOSArray() throws IOException {
+ COSArray result = super.parseCOSArray();
+ if (result != null && result.size() > MAX_ARRAY_ELEMENTS) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size()));
+ }
+ return result;
}
-
- public PreflightDocument getPreflightDocument() throws IOException {
- return (PreflightDocument)getPDDocument();
+
+ @Override
+ /**
+ * Call {@link BaseParser#parseCOSName()} and check the length of the name
+ */
+ protected COSName parseCOSName() throws IOException {
+ COSName result = super.parseCOSName();
+ if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long"));
+ }
+ return result;
+ }
+
+ /**
+ * Check that the hexa string contains only an even number of Hexadecimal characters.
+ * Once it is done, reset the offset at the beginning of the string and call {@link BaseParser#parseCOSString()}
+ */
+ protected COSString parseCOSString() throws IOException
+ {
+ // offset reminder
+ long offset = pdfSource.getOffset();
+ char nextChar = (char)pdfSource.read();
+ int count = 0;
+ if (nextChar == '<') {
+ do {
+ nextChar = (char)pdfSource.read();
+ if (nextChar != '>') {
+ if (Character.digit((char)nextChar, 16) >= 0) {
+ count++;
+ } else {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, "Hexa String must have only Hexadecimal Characters (found '" + nextChar +"')" ));
+ break;
+ }
+ }
+ } while (nextChar != '>');
+ }
+
+ if (count % 2 != 0) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, "Hexa string shall contain even number of non white space char"));
+ }
+
+ // reset the offset to parse the COSString
+ pdfSource.seek(offset);
+ COSString result = super.parseCOSString();
+
+ if ( result.getString().length() > MAX_STRING_LENGTH) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long"));
+ }
+ return result;
+ }
+
+ /**
+ * Call {@link BaseParser#parseDirObject()} check limit range for Float, Integer and number of Dictionary entries.
+ */
+ protected COSBase parseDirObject() throws IOException
+ {
+ COSBase result = super.parseDirObject();
+
+
+ if (result instanceof COSNumber) {
+ COSNumber number = (COSNumber)result;
+ if (number instanceof COSFloat) {
+ Double real = number.doubleValue();
+ if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Float is too long or too small: " + real));
+ }
+ } else {
+ long numAsLong = number.longValue();
+ if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Numeric is too long or too small: " + numAsLong));
+ }
+ }
+ }
+
+ if (result instanceof COSDictionary) {
+ COSDictionary dic = (COSDictionary)result;
+ if (dic.size() > MAX_DICT_ENTRIES) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary"));
+ }
+ }
+ return result;
+ }
+
+ protected COSBase parseObjectDynamically( int objNr, int objGenNr, boolean requireExistingNotCompressedObj ) throws IOException {
+ // ---- create object key and get object (container) from pool
+ final COSObjectKey objKey = new COSObjectKey( objNr, objGenNr );
+ final COSObject pdfObject = document.getObjectFromPool( objKey );
+
+ if ( pdfObject.getObject() == null )
+ {
+ // not previously parsed
+ // ---- read offset or object stream object number from xref table
+ Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get( objKey );
+
+ // sanity test to circumvent loops with broken documents
+ if ( requireExistingNotCompressedObj && ( ( offsetOrObjstmObNr == null ) || ( offsetOrObjstmObNr <= 0 ) ) ) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NEGATIVE_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
+ throw new SyntaxValidationException( "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
+ }
+
+ if ( offsetOrObjstmObNr == null ) {
+ // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
+ pdfObject.setObject( COSNull.NULL );
+ } else if ( offsetOrObjstmObNr > 0 ) {
+ // offset of indirect object in file
+ // ---- go to object start
+ setPdfSource( offsetOrObjstmObNr );
+ // ---- we must have an indirect object
+ int readObjNr = 0;
+ int readObjGen = 0;
+
+ long offset = pdfSource.getOffset();
+ String line = readLine();
+ Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.matches()) {
+ readObjNr = Integer.parseInt(matcher.group(1));
+ readObjGen = Integer.parseInt(matcher.group(2));
+ } else {
+
+ addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
+ // reset pdfSource cursor to read object information
+ pdfSource.seek(offset);
+ readObjNr = readInt();
+ readObjGen = readInt();
+ for ( char c : OBJ_MARKER )
+ {
+ if ( pdfSource.read() != c )
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String( OBJ_MARKER ) + " but missed at character '" + c + "'" ));
+ throw new SyntaxValidationException( "Expected pattern '" + new String( OBJ_MARKER ) + " but missed at character '" + c + "'" , validationResult);
+ }
+ }
+ }
+
+ // ---- consistency check
+ if ( ( readObjNr != objKey.getNumber() ) || ( readObjGen != objKey.getGeneration() ) )
+ {
+ throw new IOException( "XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() +" points to wrong object: " + readObjNr + ":" + readObjGen );
+ }
+
+ skipSpaces();
+ COSBase pb = parseDirObject();
+ skipSpaces();
+ long endObjectOffset = pdfSource.getOffset();
+ String endObjectKey = readString();
+
+ if ( endObjectKey.equals( "stream" ) )
+ {
+ pdfSource.seek(endObjectOffset);
+ if( pb instanceof COSDictionary )
+ {
+ COSStream stream = parseCOSStream( (COSDictionary)pb, getDocument().getScratchFile() );
+ if ( securityHandler != null )
+ {
+ try
+ {
+ securityHandler.decryptStream(stream, objNr, objGenNr );
+ }
+ catch ( CryptographyException ce )
+ {
+ throw new IOException( "Error decrypting stream object " + objNr + ": " + ce.getMessage()
+ /*, ce // TODO: remove remark with Java 1.6 */ );
+ }
+ }
+ pb = stream;
+ }
+ else
+ {
+ // this is not legal
+ // the combination of a dict and the stream/endstream forms a complete stream object
+ throw new IOException( "Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")." );
+ }
+ skipSpaces();
+ endObjectOffset = pdfSource.getOffset();
+ endObjectKey = readString();
+
+ // we have case with a second 'endstream' before endobj
+ if ( ! endObjectKey.startsWith( "endobj" ) )
+ {
+ if ( endObjectKey.startsWith( "endstream" ) )
+ {
+ endObjectKey = endObjectKey.substring( 9 ).trim();
+ if ( endObjectKey.length() == 0 )
+ {
+ // no other characters in extra endstream line
+ endObjectKey = readString(); // read next line
+ }
+ }
+ }
+ } else if ( securityHandler != null )
+ {
+ // decrypt
+ if ( pb instanceof COSString )
+ {
+ decrypt( (COSString) pb, objNr, objGenNr );
+ }
+ else if ( pb instanceof COSDictionary )
+ {
+ for( Entry<COSName,COSBase> entry : ((COSDictionary) pb).entrySet() )
+ {
+ // TODO: specially handle 'Contents' entry of signature dictionary like in SecurityHandler#decryptDictionary
+ if ( entry.getValue() instanceof COSString )
+ {
+ decrypt( (COSString) entry.getValue(), objNr, objGenNr );
+ }
+ }
+ }
+ else if ( pb instanceof COSArray )
+ {
+ final COSArray array = (COSArray) pb;
+ for( int aIdx = 0, len = array.size(); aIdx < len; aIdx++ )
+ {
+ if ( array.get( aIdx ) instanceof COSString )
+ {
+ decrypt( (COSString) array.get( aIdx ), objNr, objGenNr );
+ }
+ }
+ }
+ }
+
+ pdfObject.setObject( pb );
+
+ if ( ! endObjectKey.startsWith( "endobj" ) )
+ {
+ throw new IOException( "Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'." );
+ } else {
+ offset = pdfSource.getOffset();
+ pdfSource.seek(endObjectOffset-1);
+ if (!nextIsEOL()) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword"));
+ }
+ pdfSource.seek(offset);
+ }
+
+ if (!nextIsEOL()) {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword"));
+ }
+
+ releasePdfSourceInputStream();
+ } else {
+ // xref value is object nr of object stream containing object to be parsed;
+ // since our object was not found it means object stream was not parsed so far
+ final int objstmObjNr = (int) ( - offsetOrObjstmObNr );
+ final COSBase objstmBaseObj = parseObjectDynamically( objstmObjNr, 0, true );
+ if ( objstmBaseObj instanceof COSStream )
+ {
+ // parse object stream
+ PDFObjectStreamParser parser = new PDFObjectStreamParser( (COSStream) objstmBaseObj, document, forceParsing );
+ parser.parse();
+
+ // get set of object numbers referenced for this object stream
+ final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers( objstmObjNr );
+
+ // register all objects which are referenced to be contained in object stream
+ for( COSObject next : parser.getObjects() )
+ {
+ COSObjectKey stmObjKey = new COSObjectKey( next );
+ if ( refObjNrs.contains( stmObjKey.getNumber() ) )
+ {
+ COSObject stmObj = document.getObjectFromPool( stmObjKey );
+ stmObj.setObject( next.getObject() );
+ }
+ }
+ }
+ }
+ }
+ return pdfObject.getObject();
+ }
+
+ protected int lastIndexOf( final char[] pattern, final byte[] buf, final int endOff )
+ {
+ int offset = super.lastIndexOf(pattern, buf, endOff);
+ if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) {
+ // this is the offset of the last %%EOF sequence.
+ // nothing should be present after this sequence.
+ int tmpOffset = offset + pattern.length;
+ if (tmpOffset != buf.length) {
+ // EOL is authorized
+ if ((buf.length - tmpOffset) > 2 || !(buf[tmpOffset] == 10 || buf[tmpOffset] == 13 || buf[tmpOffset+1] == 10)) {
+ addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,"File contains data after the last %%EOF sequence"));
+ }
+ }
+ }
+ return offset;
}
}
\ No newline at end of file
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java Mon Sep 24 21:28:34 2012
@@ -53,7 +53,6 @@ import org.apache.pdfbox.preflight.Prefl
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.COSUtils;
-import org.apache.pdfbox.preflight.utils.PdfElementParser;
public class TrailerValidationProcess extends AbstractProcess {
@@ -89,26 +88,14 @@ public class TrailerValidationProcess ex
* @param result
*/
protected void checkTrailersForLinearizedPDF14(PreflightContext ctx) {
- List<String> lTrailers = ctx.getPdfExtractor().getAllTrailers();
-
- if (lTrailers.isEmpty()) {
+ COSDictionary first = ctx.getXrefTableResolver().getFirstTrailer();
+ if (first == null) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER, "There are no trailer in the PDF file"));
- } else {
- String firstTrailer = lTrailers.get(0);
- String lastTrailer = lTrailers.get(lTrailers.size() - 1);
-
- COSDictionary first = null;
- COSDictionary last = null;
+ } else {
+ COSDictionary last = ctx.getXrefTableResolver().getLastTrailer();
COSDocument cosDoc = null;
try {
cosDoc = new COSDocument();
-
- PdfElementParser parser1 = new PdfElementParser(cosDoc, firstTrailer.getBytes());
- first = parser1.parseAsDictionary();
-
- PdfElementParser parser2 = new PdfElementParser(cosDoc, lastTrailer.getBytes());
- last = parser2.parseAsDictionary();
-
checkMainTrailer(ctx, first);
if (!compareIds(first, last, cosDoc)) {
addValidationError(ctx, new ValidationError(
@@ -194,7 +181,7 @@ public class TrailerValidationProcess ex
if (idFirst == null || idLast == null) {
return false;
}
-
+
// ---- cast two COSBase to COSArray.
COSArray af = COSUtils.getAsArray(idFirst, cosDocument);
COSArray al = COSUtils.getAsArray(idLast, cosDocument);
@@ -260,7 +247,7 @@ public class TrailerValidationProcess ex
id = true;
}
}
-
+
COSDocument cosDocument = ctx.getDocument().getDocument();
// PDF/A Trailer dictionary must contain the ID key
if (!id) {
Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
import junit.framework.Assert;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -51,13 +52,25 @@ public class TestInvalidDirectory {
@Test
public void validate () throws Exception {
+ PreflightDocument document = null;
+
System.out.println(target);
- PreflightParser parser = new PreflightParser(new FileDataSource(target));
- parser.parse();
- PreflightDocument document = (PreflightDocument) parser.getPDDocument();
- document.validate();
- Assert.assertFalse(document.getResult().isValid());
- document.close();
+ ValidationResult result = null;
+ try {
+ PreflightParser parser = new PreflightParser(new FileDataSource(target));
+ parser.parse();
+ document = (PreflightDocument)parser.getPDDocument();
+ document.validate();
+ result = document.getResult();
+ } catch (SyntaxValidationException e) {
+ result = e.getResult();
+ } finally {
+ if (document != null) {
+ document.close();
+ }
+ }
+ Assert.assertFalse("Test of " + target, result.isValid());
+
}
@Parameters
Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java Mon Sep 24 21:28:34 2012
@@ -37,6 +37,7 @@ import junit.framework.Assert;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.junit.AfterClass;
@@ -90,13 +91,21 @@ public class TestIsartorValidationFromCl
try {
System.out.println(path);
InputStream input = this.getClass().getResourceAsStream(path);
- PreflightParser parser = new PreflightParser(new org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
- parser.parse();
- document = (PreflightDocument)parser.getPDDocument();
- document.validate();
- ValidationResult result = document.getResult();
+
+ ValidationResult result = null;
+ try {
+ PreflightParser parser = new PreflightParser(new org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
+ parser.parse();
+ document = (PreflightDocument)parser.getPDDocument();
+ document.validate();
+ result = document.getResult();
+ } catch (SyntaxValidationException e) {
+ result = e.getResult();
+ }
+
Assert.assertFalse(path + " : Isartor file should be invalid (" + path + ")", result.isValid());
Assert.assertTrue(path + " : Should find at least one error", result.getErrorsList().size() > 0);
+
// could contain more than one error
boolean found = false;
for (ValidationError error : result.getErrorsList()) {
@@ -104,8 +113,7 @@ public class TestIsartorValidationFromCl
found = true;
}
if (isartorResultFile != null) {
- String log = path.replace(".pdf", "") + "#"
- +error.getErrorCode()+"#"+error.getDetails()+"\n";
+ String log = path.replace(".pdf", "") + "#" + error.getErrorCode()+"#"+error.getDetails()+"\n";
isartorResultFile.write(log.getBytes());
}
}
@@ -145,7 +153,7 @@ public class TestIsartorValidationFromCl
IOUtils.closeQuietly(expected);
// prepare config
List<Object[]> data = new ArrayList<Object[]>();
- InputStream is = Class.class.getResourceAsStream("/Isartor testsuite.list");
+ InputStream is = Class.class.getResourceAsStream("/Isartor testsuite.list");
if (is != null)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
import junit.framework.Assert;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -38,58 +39,62 @@ import org.junit.runners.Parameterized.P
@RunWith(Parameterized.class)
public class TestValidDirectory {
-
- protected File target = null;
-
- public TestValidDirectory (File file) {
- this.target = file;
- }
-
- @Test
- public void validate () throws Exception {
- PreflightDocument document = null;
- try {
- System.out.println(target);
- PreflightParser parser = new PreflightParser(new FileDataSource(target));
- parser.parse();
- document = (PreflightDocument) parser.getPDDocument();
- document.validate();
- Assert.assertTrue("Validation of " + target ,document.getResult().isValid());
- } finally {
- if (document != null) {
- document.close();
- }
- }
- }
-
- @Parameters
- public static Collection<Object[]> initializeParameters() throws Exception {
- // check directory
- File directory = null;
- String pdfPath = System.getProperty("pdfa.valid", null);
- if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
- if (pdfPath!=null) {
- directory = new File(pdfPath);
- if (!directory.exists()) throw new Exception ("directory does not exists : "+directory.getAbsolutePath());
- if (!directory.isDirectory()) throw new Exception ("not a directory : "+directory.getAbsolutePath());
- } else {
- System.err.println("System property 'pdfa.valid' not defined, will not run TestValidaDirectory");
- }
- // create list
- if (directory==null) {
- return new ArrayList<Object[]>(0);
- } else {
- File [] files = directory.listFiles();
- List<Object[]> data = new ArrayList<Object[]>(files.length);
- for (File file : files) {
- if (file.isFile()) {
- data.add(new Object [] {file});
- }
- }
- return data;
- }
- }
-
-
-
+
+ protected File target = null;
+
+ public TestValidDirectory (File file) {
+ this.target = file;
+ }
+
+ @Test
+ public void validate () throws Exception {
+ PreflightDocument document = null;
+ System.out.println(target);
+ ValidationResult result = null;
+ try {
+ PreflightParser parser = new PreflightParser(new FileDataSource(target));
+ parser.parse();
+ document = (PreflightDocument)parser.getPDDocument();
+ document.validate();
+ result = document.getResult();
+ } catch (SyntaxValidationException e) {
+ result = e.getResult();
+ } finally {
+ if (document != null) {
+ document.close();
+ }
+ }
+ Assert.assertTrue("Validation of " + target , result.isValid());
+ }
+
+ @Parameters
+ public static Collection<Object[]> initializeParameters() throws Exception {
+ // check directory
+ File directory = null;
+ String pdfPath = System.getProperty("pdfa.valid", null);
+ if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
+ if (pdfPath!=null) {
+ directory = new File(pdfPath);
+ if (!directory.exists()) throw new Exception ("directory does not exists : "+directory.getAbsolutePath());
+ if (!directory.isDirectory()) throw new Exception ("not a directory : "+directory.getAbsolutePath());
+ } else {
+ System.err.println("System property 'pdfa.valid' not defined, will not run TestValidaDirectory");
+ }
+ // create list
+ if (directory==null) {
+ return new ArrayList<Object[]>(0);
+ } else {
+ File [] files = directory.listFiles();
+ List<Object[]> data = new ArrayList<Object[]>(files.length);
+ for (File file : files) {
+ if (file.isFile()) {
+ data.add(new Object [] {file});
+ }
+ }
+ return data;
+ }
+ }
+
+
+
}
Modified: pdfbox/trunk/preflight/src/test/resources/expected_errors.txt
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/resources/expected_errors.txt?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/resources/expected_errors.txt (original)
+++ pdfbox/trunk/preflight/src/test/resources/expected_errors.txt Mon Sep 24 21:28:34 2012
@@ -23,12 +23,12 @@ isartor-6-1-2-t01-fail-a.pdf=1.1
isartor-6-1-2-t02-fail-a.pdf=1.1
isartor-6-1-3-t01-fail-a.pdf=1.4.1
isartor-6-1-3-t02-fail-a.pdf=1.4.2
-isartor-6-1-3-t03-fail-a.pdf=1.4
+isartor-6-1-3-t03-fail-a.pdf=1.4.10 // 1.4 due to JavaCC
isartor-6-1-3-t04-fail-a.pdf=1.4.6 // Revoir le parser pour les linearized file (LIGNE VIDE qui encadre le body?????)
isartor-6-1-4-t01-fail-a.pdf=1.3
isartor-6-1-4-t02-fail-a.pdf=1.3
-isartor-6-1-6-t01-fail-a.pdf=1.2
-isartor-6-1-7-t01-fail-a.pdf=1.2
+isartor-6-1-6-t01-fail-a.pdf=1.0.11 // String Hex error - before was 1.2 due to JavaCC validation
+isartor-6-1-7-t01-fail-a.pdf=1.2.2 // Stream keyword must be followed by CR&LF or LF only- before was 1.2 due to JavaCC validation
isartor-6-1-7-t02-fail-a.pdf=1.2.2
isartor-6-1-7-t03-fail-a.pdf=1.2.5
isartor-6-1-7-t04-fail-a.pdf=1.2.6
@@ -36,7 +36,7 @@ isartor-6-1-7-t04-fail-b.pdf=1.2.6
isartor-6-1-7-t04-fail-c.pdf=1.2.6
isartor-6-1-8-t01-fail-a.pdf=1.2.1
isartor-6-1-8-t02-fail-a.pdf=1.2.1
-isartor-6-1-8-t03-fail-a.pdf=1.2
+isartor-6-1-8-t03-fail-a.pdf=1.2.1 // before was 1.2 due to JavaCC
isartor-6-1-8-t04-fail-a.pdf=1.2.1
isartor-6-1-8-t05-fail-a.pdf=1.2.1
isartor-6-1-8-t06-fail-a.pdf=1.2.1