You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/09/24 23:28:35 UTC

svn commit: r1389604 - in /pdfbox/trunk/preflight: ./ src/main/java/org/apache/pdfbox/preflight/ src/main/java/org/apache/pdfbox/preflight/exception/ src/main/java/org/apache/pdfbox/preflight/parser/ src/main/java/org/apache/pdfbox/preflight/process/ s...

Author: leleueri
Date: Mon Sep 24 21:28:34 2012
New Revision: 1389604

URL: http://svn.apache.org/viewvc?rev=1389604&view=rev
Log:
[https://issues.apache.org/jira/browse/PDFBOX-1373] Syntax validation is done by the preflight parser

Added:
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java   (with props)
Modified:
    pdfbox/trunk/preflight/pom.xml
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
    pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
    pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
    pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
    pdfbox/trunk/preflight/src/test/resources/expected_errors.txt

Modified: pdfbox/trunk/preflight/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/pom.xml?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/pom.xml (original)
+++ pdfbox/trunk/preflight/pom.xml Mon Sep 24 21:28:34 2012
@@ -235,6 +235,18 @@
         <scope>test</scope>
     </dependency>
   -->
+      <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcmail-jdk15</artifactId>
+      <version>1.44</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcprov-jdk15</artifactId>
+      <version>1.44</version>
+      <optional>true</optional>
+    </dependency>
   </dependencies>
 
   <reporting>

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java Mon Sep 24 21:28:34 2012
@@ -339,6 +339,18 @@ public interface PreflightConstants {
 	 * CID too long
 	 */
 	String ERROR_SYNTAX_CID_RANGE = "1.0.10";
+	/**
+	 * Hexa string shall contain even number of non white space char
+	 */
+	String ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER = "1.0.11";
+	/**
+	 * Hexa string contain non hexadecimal characters
+	 */
+	String ERROR_SYNTAX_HEXA_STRING_INVALID= "1.0.12";	
+	/**
+	 * An object is missing from the document (offset is negative)
+	 */
+	String ERROR_SYNTAX_NEGATIVE_OFFSET = "1.0.13";
 
 	String ERROR_SYNTAX_HEADER = "1.1";
 	
@@ -444,6 +456,10 @@ public interface PreflightConstants {
 	 * Errors in the Outlines dictionary
 	 */
 	String ERROR_SYNTAX_TRAILER_OUTLINES_INVALID = "1.4.9";
+	/**
+	 * Last %%EOF sequence is followed by data
+	 */
+	String ERROR_SYNTAX_TRAILER_EOF = "1.4.10";
 
 	// -----------------------------------------------------------
 	// ---- GRAPHIC ERRORS 2.x...

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Mon Sep 24 21:28:34 2012
@@ -24,15 +24,16 @@ package org.apache.pdfbox.preflight;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.SortedMap;
 
 import javax.activation.DataSource;
 
 import org.apache.padaf.xmpbox.XMPMetadata;
 import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.font.container.FontContainer;
 import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
 import org.apache.pdfbox.preflight.utils.COSUtils;
 
 public class PreflightContext {
@@ -50,13 +51,18 @@ public class PreflightContext {
 	 * The datasource to load the document from
 	 */
 	protected DataSource source = null;
-
-	/**
-	 * JavaCC Token Manager used to get some content of the PDF file as string (ex
-	 * : Trailers)
-	 */
-	protected ExtractorTokenManager pdfExtractor = null;
-
+//
+//	/**
+//	 * JavaCC Token Manager used to get some content of the PDF file as string (ex
+//	 * : Trailers)
+//	 */
+//	protected ExtractorTokenManager pdfExtractor = null;
+
+  /** Contains all Xref/trailer objects and resolves them into single
+   *  object using startxref reference. 
+   */
+	private XrefTrailerResolver xrefTableResolver;
+	
 	/**
 	 * This wrapper contains the ICCProfile used by the PDF file.
 	 */
@@ -103,21 +109,21 @@ public class PreflightContext {
 		this.metadata = metadata;
 	}
 	
-	/**
-	 * @return the value of the pdfExtractor attribute.
-	 */
-	public ExtractorTokenManager getPdfExtractor() {
-		return pdfExtractor;
-	}
-
-	/**
-	 * Initialize the pdfExtractor attribute.
-	 * 
-	 * @param pdfExtractor
-	 */
-	public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
-		this.pdfExtractor = pdfExtractor;
-	}
+//	/**
+//	 * @return the value of the pdfExtractor attribute.
+//	 */
+//	public ExtractorTokenManager getPdfExtractor() {
+//		return pdfExtractor;
+//	}
+//
+//	/**
+//	 * Initialize the pdfExtractor attribute.
+//	 * 
+//	 * @param pdfExtractor
+//	 */
+//	public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
+//		this.pdfExtractor = pdfExtractor;
+//	}
 
 	/**
 	 * @return the PDFBox object representation of the document
@@ -126,6 +132,14 @@ public class PreflightContext {
 		return document;
 	}
 
+	public XrefTrailerResolver getXrefTableResolver() {
+  	return xrefTableResolver;
+  }
+
+	public void setXrefTableResolver(XrefTrailerResolver xrefTableResolver) {
+  	this.xrefTableResolver = xrefTableResolver;
+  }
+
 	/**
 	 * Initialize the PDFBox object which present the PDF File.
 	 * 

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java Mon Sep 24 21:28:34 2012
@@ -25,6 +25,7 @@ import javax.activation.FileDataSource;
 
 import org.apache.pdfbox.Version;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 
 
@@ -48,16 +49,20 @@ public class Validator_A1b {
 			System.exit(1);
 		}
 
+		ValidationResult result = null;
 		FileDataSource fd = new FileDataSource(args[0]);
-		
 		PreflightParser parser = new PreflightParser(fd);
-		parser.parse();
-		PreflightDocument document = (PreflightDocument)parser.getPDDocument();
-		document.validate();
+		try {
+			parser.parse();
+			PreflightDocument document = parser.getPreflightDocument();
+			document.validate();
+			result = document.getResult();
+			document.close();
+		} catch (SyntaxValidationException e) {
+			result = e.getResult();
+		}
 
-		ValidationResult result = document.getResult();
 		if (result.isValid()) {
-			document.close();
 			System.out.println("The file " + args[0] + " is a valid PDF/A-1b file");
 			System.exit(0);
 		} else {
@@ -66,7 +71,6 @@ public class Validator_A1b {
 				System.out.println(error.getErrorCode() + " : " + error.getDetails());
 			}
 
-			document.close();
 			System.exit(-1);
 		}
 	}

Added: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java?rev=1389604&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java Mon Sep 24 21:28:34 2012
@@ -0,0 +1,49 @@
+/*****************************************************************************
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * 
+ ****************************************************************************/
+
+package org.apache.pdfbox.preflight.exception;
+
+import org.apache.pdfbox.preflight.ValidationResult;
+
+public class SyntaxValidationException extends ValidationException {
+
+	private final ValidationResult result;
+	
+	public SyntaxValidationException(String message, Throwable cause, ValidationResult result) {
+	  super(message, cause);
+	  this.result = result;
+  }
+
+	public SyntaxValidationException(String message, ValidationResult result) {
+	  super(message);
+	  this.result = result;
+  }
+
+	public SyntaxValidationException(Throwable cause, ValidationResult result) {
+	  super(cause);
+	  this.result = result;
+  }
+
+	public ValidationResult getResult() {
+  	return result;
+  }
+
+}

Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Mon Sep 24 21:28:34 2012
@@ -21,17 +21,61 @@
 
 package org.apache.pdfbox.preflight.parser;
 
+import static org.apache.pdfbox.preflight.PreflightConstants.*;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CROSS_REF;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_INVALID;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_TOO_LONG;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NAME_TOO_LONG;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
+import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DICT_ENTRIES;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NAME_SIZE;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_POSITIVE_FLOAT;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
+import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import javax.activation.DataSource;
+import javax.activation.FileDataSource;
 
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.exceptions.CryptographyException;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.pdfparser.BaseParser;
+import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
+import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
 import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
 import org.apache.pdfbox.preflight.Format;
 import org.apache.pdfbox.preflight.PreflightConfiguration;
 import org.apache.pdfbox.preflight.PreflightConstants;
@@ -39,13 +83,9 @@ import org.apache.pdfbox.preflight.Prefl
 import org.apache.pdfbox.preflight.PreflightDocument;
 import org.apache.pdfbox.preflight.ValidationResult;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
-import org.apache.pdfbox.preflight.exception.PdfParseException;
-import org.apache.pdfbox.preflight.exception.ValidationException;
-import org.apache.pdfbox.preflight.javacc.ParseException;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
-import org.apache.pdfbox.preflight.javacc.extractor.SimpleCharStream;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 
-public class PreflightParser extends PDFParser {
+public class PreflightParser extends NonSequentialPDFParser {
 	/**
 	 * Define a one byte encoding that hasn't specific encoding in UTF-8 charset.
 	 * Avoid unexpected error when the encoding is Cp5816
@@ -56,18 +96,20 @@ public class PreflightParser extends PDF
 
 	protected ValidationResult validationResult;
 
-	protected PreflightDocument document;
+	protected PreflightDocument preflightDocument;
 
 	protected PreflightContext ctx;
 
-	public PreflightParser(DataSource input, RandomAccess rafi, boolean force)	throws IOException {
-		super(input.getInputStream(), rafi, force);
-		this.originalDocument = input;
+	public PreflightParser(File file, RandomAccess rafi) throws IOException {
+		super(file, rafi);
+		this.originalDocument = new FileDataSource(file);
 	}
 
-	public PreflightParser(DataSource input, RandomAccess rafi) throws IOException {
-		super(input.getInputStream(), rafi);
-		this.originalDocument = input;
+	public PreflightParser(File file) throws IOException {
+		this(file, null);
+	}
+	public PreflightParser(String filename) throws IOException {
+		this(new File(filename), null);
 	}
 
 	public PreflightParser(DataSource input) throws IOException {
@@ -75,29 +117,6 @@ public class PreflightParser extends PDF
 		this.originalDocument = input;
 	}
 
-
-	/**
-	 * Create an instance of ValidationResult. This object contains an instance of
-	 * ValidationError. If the ParseException is an instance of PdfParseException,
-	 * the embedded validation error is initialized with the error code of the
-	 * exception, otherwise it is an UnknownError.
-	 * 
-	 * @param e
-	 * @return
-	 */
-	protected static ValidationResult createErrorResult(ParseException e) {
-		if (e instanceof PdfParseException) {
-			if (e.getCause()==null) {
-				return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode()));
-			} else if (e.getCause().getMessage()==null) {
-				return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode()));
-			} else {
-				return new ValidationResult(new ValidationError(((PdfParseException)e).getErrorCode(),e.getCause().getMessage()));
-			}
-		}
-		return createUnknownErrorResult();
-	}
-
 	/**
 	 * Create an instance of ValidationResult with a
 	 * ValidationError(UNKNOWN_ERROR)
@@ -152,58 +171,578 @@ public class PreflightParser extends PDF
 	 * @throws IOException
 	 */
 	public void parse(Format format, PreflightConfiguration config) throws IOException {
-		checkFileSyntax();
-		// run PDFBox Parser
-		super.parse();
+		checkPdfHeader();
+		try {
+			super.parse();
+		} catch (IOException e) {
+			addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
+			throw new SyntaxValidationException(e, this.validationResult);
+		}
 		Format formatToUse = (format == null ? Format.PDF_A1B : format);
 		createPdfADocument(formatToUse, config);
 		createContext();
-		extractTrailers();
 	}
-	
+
+	protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException {
+		COSDocument cosDocument = getDocument();
+		this.preflightDocument = new PreflightDocument(cosDocument, format, config);
+	}
+
+	/**
+	 * Create a validation context.
+	 * This context is set to the PreflightDocument.
+	 */
+	protected void createContext() {
+		this.ctx = new PreflightContext(this.originalDocument);
+		ctx.setDocument(preflightDocument);
+		preflightDocument.setContext(ctx);
+		ctx.setXrefTableResolver(xrefTrailerResolver);
+	}
+
+	@Override
+	public PDDocument getPDDocument() throws IOException {
+		preflightDocument.setResult(validationResult);
+		// Add XMP MetaData
+		return preflightDocument;
+	}
+
+	public PreflightDocument getPreflightDocument() throws IOException {
+		return (PreflightDocument)getPDDocument();
+	}
+
+
+	// --------------------------------------------------------
+	// - Below All methods that adds controls on the PDF syntax
+	// --------------------------------------------------------
+
+	@Override
+	/**
+	 * Fill the CosDocument with some object that isn't set by the NonSequentialParser
+	 */
+	protected void initialParse() throws IOException {
+		super.initialParse();
+
+		// fill xref table
+		document.addXRefTable(xrefTrailerResolver.getXrefTable());
+
+		// Trailer entries are useful in the preflight document
+		for (COSBase trailerEntry : getDocument().getTrailer().getValues()) {
+			if ( trailerEntry instanceof COSObject )
+			{
+				COSObject tmpObj = (COSObject) trailerEntry;
+				parseObjectDynamically( tmpObj, true );
+			}
+		}
+
+		// For each ObjectKey, we check if the object has been loaded
+		Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
+		for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) {
+			COSObject co = document.getObjectFromPool(entry.getKey());
+			if ( co.getObject() == null) {
+				// object isn't loaded - parse the object to load its content
+				parseObjectDynamically( co, true );				
+			}
+		}
+	}
+
 	/**
-	 * Run the JavaCC parser to check the PDF syntax.
-	 * @throws ValidationException
+	 * Check that the PDF header match rules of the PDF/A specification.
+	 * First line (offset 0) must be a comment with the PDF version (version 1.0 isn't conform to the PDF/A specification)
+	 * Second line is a comment with at least 4 bytes greater than 0x80  
 	 */
-	protected void checkFileSyntax() throws ValidationException {
-		// syntax (javacc) validation
+	protected void checkPdfHeader() {
+		BufferedReader reader = null;
 		try {
-			InputStreamReader reader = new InputStreamReader(this.originalDocument.getInputStream(), encoding); 
-			org.apache.pdfbox.preflight.javacc.PDFParser javaCCParser = new org.apache.pdfbox.preflight.javacc.PDFParser(reader);
-			javaCCParser.PDF();
-			IOUtils.closeQuietly(reader);
+			reader = new BufferedReader(new InputStreamReader(new FileInputStream(getPdfFile()), "ISO-8859-1"));
+			String firstLine = reader.readLine();
+			if (firstLine == null || (firstLine != null && !firstLine.matches("%PDF-1\\.[1-9]"))) {
+				addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "First line must match %PDF-1.\\d"));	
+			}
+
+			String secondLine = reader.readLine(); 
+			if (secondLine != null && secondLine.getBytes().length >= 5) {
+				for (int i = 0; i < secondLine.getBytes().length; ++i ) {
+					byte b = secondLine.getBytes()[i]; 
+					if (i == 0 && ((char)b != '%')) {
+						addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
+						break;
+					} else if (i > 0 && ((b & 0xFF) < 0x80)) {
+						addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
+						break;
+					}
+				}
+			} else {
+				addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER ,"Second line must contains at least 4 bytes greater than 127"));
+			}
+
 		} catch (IOException e) {
-			throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e);
-		} catch (ParseException e) {
-			this.validationResult = createErrorResult(e);
+			addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Unable to read the PDF file : " + e.getMessage()));
+		} finally {
+			IOUtils.closeQuietly(reader);
 		}
 	}
 
-	protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException {
-		this.document = new PreflightDocument(getDocument(), format, config);
+	/**
+	 * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls :
+	 * - EOL mandatory after the 'xref' keyword
+	 * - Cross reference subsection header uses single white space as separator
+	 * - and so on
+	 */
+	protected boolean parseXrefTable( long startByteOffset ) throws IOException
+	{
+		if(pdfSource.peek() != 'x')
+		{
+			return false;
+		}
+		String xref = readString();
+		if( !xref.equals( "xref" ) )
+		{
+			addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character"));
+			return false;
+		}
+		if (!nextIsEOL()) {
+			addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL"));
+		}
+
+		// signal start of new XRef
+		xrefTrailerResolver.nextXrefObj( startByteOffset );
+
+		/*
+		 * Xref tables can have multiple sections.
+		 * Each starts with a starting object id and a count.
+		 */
+		while(true)
+		{
+			// just after the xref<EOL> there are an integer
+			int currObjID = 0; // first obj id
+			int count = 0; // the number of objects in the xref table
+
+			long offset = pdfSource.getOffset();
+			String line = readLine();
+			Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
+			Matcher matcher = pattern.matcher(line);
+			if (matcher.matches()) {
+				currObjID = Integer.parseInt(matcher.group(1));
+				count = Integer.parseInt(matcher.group(2));
+			} else {
+				addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid"));
+				// reset pdfSource cursor to read xref information
+				pdfSource.seek(offset);
+				currObjID = readInt(); // first obj id
+				count = readInt(); // the number of objects in the xref table
+			}
+
+			skipSpaces();
+			for(int i = 0; i < count; i++)
+			{
+				if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek()))
+				{
+					break;
+				}
+				if(pdfSource.peek() == 't')
+				{
+					addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found"));
+					break;
+				}
+				//Ignore table contents
+				String currentLine = readLine();
+				String[] splitString = currentLine.split(" ");
+				if (splitString.length < 3)
+				{
+					addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine));
+					break;
+				}
+				/* This supports the corrupt table as reported in
+				 * PDFBOX-474 (XXXX XXX XX n) */
+				if(splitString[splitString.length-1].equals("n"))
+				{
+					try
+					{
+						long currOffset = Long.parseLong(splitString[0]);
+						int currGenID = Integer.parseInt(splitString[1]);
+						COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
+						xrefTrailerResolver.setXRef(objKey, currOffset);
+					}
+					catch(NumberFormatException e)
+					{
+						addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage()));
+					}
+				}
+				else if(!splitString[2].equals("f"))
+				{
+					addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID));
+				}
+				currObjID++;
+				skipSpaces();
+			}
+			skipSpaces();
+			char c = (char)pdfSource.peek();
+			if(c < '0' || c > '9')
+			{
+				break;
+			}
+		}
+		return true;
 	}
 
-	protected void createContext() {
-		this.ctx = new PreflightContext(this.originalDocument);
-		ctx.setDocument(document);
-		document.setContext(ctx);
+	/**
+	 * Wraps the {@link NonSequentialPDFParser#parseCOSStream} to check rules on 'stream' and 'endstream' keywords.
+	 * {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
+	 */
+	protected COSStream parseCOSStream( COSDictionary dic, RandomAccess file ) throws IOException	{
+		checkStreamKeyWord();
+		COSStream result = super.parseCOSStream(dic, file);
+		checkEndstreamKeyWord();
+		return result;
+	}
+
+	/**
+	 * 'stream' must be followed by <CR><LF> or only <LF>
+	 * @throws IOException
+	 */
+	protected void checkStreamKeyWord() throws IOException {
+		String streamV = readString();
+		if (!streamV.equals("stream")) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV +"'"));
+		}
+		int nextChar = pdfSource.read();
+		if ( !((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10)) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword"));
+		}
+		// set the offset before stream
+		pdfSource.seek(pdfSource.getOffset()-7);
+	}
+	
+	/**
+	 * 'endstream' must be preceded by an EOL
+	 * @throws IOException
+	 */
+	protected void checkEndstreamKeyWord() throws IOException {
+		pdfSource.seek(pdfSource.getOffset()-10);
+		if (!nextIsEOL()) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the endstream keyword"));
+		}
+		String endstreamV = readString();
+		if (!endstreamV.equals("endstream")) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword but found '" + endstreamV +"'"));
+		}
 	}
 
-	protected void extractTrailers() throws IOException {
-		SimpleCharStream scs = new SimpleCharStream(this.originalDocument.getInputStream());
-		ExtractorTokenManager extractor = new ExtractorTokenManager(scs);
-		extractor.parse();
-		ctx.setPdfExtractor(extractor);
+	protected boolean nextIsEOL() throws IOException {
+		boolean succeed = false;
+		int nextChar = pdfSource.read();
+		if ( nextChar == 13 && pdfSource.peek() == 10 ) {
+			pdfSource.read();
+			succeed = true;
+		} else if ( nextChar == 13 || nextChar == 10 ) {
+			succeed = true;
+		}
+		return succeed;
+	}
+
+	/**
+	 * @return true if the next character is a space. (The character is consumed)
+	 * @throws IOException
+	 */
+	protected boolean nextIsSpace() throws IOException {
+		return ' ' == pdfSource.read();
 	}
 
 	@Override
-	public PDDocument getPDDocument() throws IOException {
-		document.setResult(validationResult);
-		// Add XMP MetaData
-		return document;
+	/**
+	 * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array
+	 */
+	protected COSArray parseCOSArray() throws IOException {
+		COSArray result = super.parseCOSArray();
+		if (result != null && result.size() > MAX_ARRAY_ELEMENTS) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size()));
+		}
+		return result;
 	}
-	
-	public PreflightDocument getPreflightDocument() throws IOException {
-		return (PreflightDocument)getPDDocument();
+
+	@Override
+	/**
+	 * Call {@link BaseParser#parseCOSName()} and check the length of the name
+	 */
+	protected COSName parseCOSName() throws IOException {
+		COSName result = super.parseCOSName();
+		if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long"));
+		}
+		return result;
+	}
+
+	/**
+	 * Check that the hexa string contains only an even number of Hexadecimal characters.
+	 * Once it is done, reset the offset at the beginning of the string and call {@link BaseParser#parseCOSString()}
+	 */
+	protected COSString parseCOSString() throws IOException
+	{
+		// offset reminder
+		long offset = pdfSource.getOffset();
+		char nextChar = (char)pdfSource.read();
+		int count = 0;
+		if (nextChar == '<') {
+			do {
+				nextChar = (char)pdfSource.read();
+				if (nextChar != '>') {
+					if (Character.digit((char)nextChar, 16) >= 0) {
+						count++;
+					} else {
+						addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, "Hexa String must have only Hexadecimal Characters (found '" + nextChar +"')" ));
+						break;
+					}
+				}
+			} while (nextChar != '>');
+		}
+
+		if (count % 2 != 0) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, "Hexa string shall contain even number of non white space char"));
+		}
+
+		// reset the offset to parse the COSString
+		pdfSource.seek(offset);
+		COSString result = super.parseCOSString();
+
+		if ( result.getString().length() > MAX_STRING_LENGTH) {
+			addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long"));
+		}
+		return result;
+	}
+
+	/**
+	 * Call {@link BaseParser#parseDirObject()} check limit range for Float, Integer and number of Dictionary entries.
+	 */
+	protected COSBase parseDirObject() throws IOException
+	{
+		COSBase result = super.parseDirObject();
+
+
+		if (result instanceof COSNumber) {
+			COSNumber number = (COSNumber)result;
+			if (number instanceof COSFloat) {
+				Double real = number.doubleValue();
+				if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) {
+					addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Float is too long or too small: " + real));
+				}
+			} else {
+				long numAsLong = number.longValue();
+				if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) {
+					addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Numeric is too long or too small: " + numAsLong));	
+				}
+			}
+		}
+
+		if (result instanceof COSDictionary) {
+			COSDictionary dic = (COSDictionary)result;
+			if (dic.size() > MAX_DICT_ENTRIES) {
+				addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary"));
+			}
+		}
+		return result;
+	}
+
+	protected COSBase parseObjectDynamically( int objNr, int objGenNr, boolean requireExistingNotCompressedObj ) throws IOException {
+		// ---- create object key and get object (container) from pool
+		final COSObjectKey objKey    = new COSObjectKey( objNr, objGenNr );
+		final COSObject    pdfObject = document.getObjectFromPool( objKey );
+
+		if ( pdfObject.getObject() == null )
+		{
+			// not previously parsed
+			// ---- read offset or object stream object number from xref table
+			Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get( objKey );
+
+			// sanity test to circumvent loops with broken documents
+			if ( requireExistingNotCompressedObj &&	( ( offsetOrObjstmObNr == null ) || ( offsetOrObjstmObNr <= 0 ) ) )	{   
+				addValidationError(new ValidationError(ERROR_SYNTAX_NEGATIVE_OFFSET, "Object must be defined and must not be compressed object: " +	objKey.getNumber() + ":" + objKey.getGeneration()));
+				throw new SyntaxValidationException( "Object must be defined and must not be compressed object: " +	objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
+			}
+
+			if ( offsetOrObjstmObNr == null )	{
+				// not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
+				pdfObject.setObject( COSNull.NULL );
+			}	else if ( offsetOrObjstmObNr > 0 )	{
+				// offset of indirect object in file
+				// ---- go to object start
+				setPdfSource( offsetOrObjstmObNr );
+				// ---- we must have an indirect object
+				int readObjNr  = 0;
+				int readObjGen = 0;
+
+				long offset = pdfSource.getOffset();
+				String line = readLine();
+				Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
+				Matcher matcher = pattern.matcher(line);
+				if (matcher.matches()) {
+					readObjNr = Integer.parseInt(matcher.group(1));
+					readObjGen = Integer.parseInt(matcher.group(2));
+				} else {
+
+					addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
+					// reset pdfSource cursor to read object information
+					pdfSource.seek(offset);
+					readObjNr  = readInt();
+					readObjGen = readInt();
+					for ( char c : OBJ_MARKER )
+					{
+						if ( pdfSource.read() != c )
+						{
+							addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String( OBJ_MARKER )  +	" but missed at character '" + c + "'" ));
+							throw new SyntaxValidationException( "Expected pattern '" + new String( OBJ_MARKER )  +	" but missed at character '" + c + "'" , validationResult);
+						}
+					}
+				}
+
+				// ---- consistency check
+				if ( ( readObjNr != objKey.getNumber() ) ||	( readObjGen != objKey.getGeneration() ) ) 
+				{
+					throw new IOException( "XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() +" points to wrong object: " + readObjNr + ":" + readObjGen );
+				}
+
+				skipSpaces();
+				COSBase pb           = parseDirObject();
+				skipSpaces();				
+				long endObjectOffset = pdfSource.getOffset();
+				String  endObjectKey = readString();
+
+				if ( endObjectKey.equals( "stream" ) ) 
+				{
+					pdfSource.seek(endObjectOffset);
+					if( pb instanceof COSDictionary )
+					{
+						COSStream stream = parseCOSStream( (COSDictionary)pb, getDocument().getScratchFile() );
+						if ( securityHandler != null )
+						{
+							try 
+							{
+								securityHandler.decryptStream(stream, objNr, objGenNr );
+							} 
+							catch ( CryptographyException ce ) 
+							{
+								throw new IOException( "Error decrypting stream object " + objNr + ": " + ce.getMessage()
+										/*, ce // TODO: remove remark with Java 1.6 */ );
+							}
+						}
+						pb = stream;
+					}
+					else
+					{
+						// this is not legal
+						// the combination of a dict and the stream/endstream forms a complete stream object
+						throw new IOException( "Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")." );
+					}
+					skipSpaces();
+					endObjectOffset = pdfSource.getOffset();
+					endObjectKey = readString();
+
+					// we have case with a second 'endstream' before endobj
+					if ( ! endObjectKey.startsWith( "endobj" ) )
+					{
+						if ( endObjectKey.startsWith( "endstream" ) ) 
+						{
+							endObjectKey = endObjectKey.substring( 9 ).trim();
+							if ( endObjectKey.length() == 0 )
+							{
+								// no other characters in extra endstream line
+								endObjectKey = readString();    // read next line 
+							}
+						}
+					}
+				} else if ( securityHandler != null )
+				{
+					// decrypt
+					if ( pb instanceof COSString )
+					{
+						decrypt( (COSString) pb, objNr, objGenNr );
+					}
+					else if ( pb instanceof COSDictionary )
+					{
+						for( Entry<COSName,COSBase> entry : ((COSDictionary) pb).entrySet() )
+						{
+							// TODO: specially handle 'Contents' entry of signature dictionary like in SecurityHandler#decryptDictionary
+							if ( entry.getValue() instanceof COSString )
+							{
+								decrypt( (COSString) entry.getValue(), objNr, objGenNr );
+							}
+						}
+					}
+					else if ( pb instanceof COSArray )
+					{
+						final COSArray array = (COSArray) pb;
+						for( int aIdx = 0, len = array.size(); aIdx < len; aIdx++ )
+						{
+							if ( array.get( aIdx ) instanceof COSString )
+							{
+								decrypt( (COSString) array.get( aIdx ), objNr, objGenNr );
+							}
+						}
+					}
+				}
+
+				pdfObject.setObject( pb );
+
+				if ( ! endObjectKey.startsWith( "endobj" ) )
+				{
+					throw new IOException( "Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'." );
+				} else {
+					offset = pdfSource.getOffset();
+					pdfSource.seek(endObjectOffset-1);
+					if (!nextIsEOL()) {
+						addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword"));
+					}
+					pdfSource.seek(offset);
+				}
+
+				if (!nextIsEOL()) {
+					addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword"));
+				}
+
+				releasePdfSourceInputStream();
+			}	else {
+				// xref value is object nr of object stream containing object to be parsed;
+				// since our object was not found it means object stream was not parsed so far
+				final int     objstmObjNr   = (int) ( - offsetOrObjstmObNr );
+				final COSBase objstmBaseObj = parseObjectDynamically( objstmObjNr, 0, true );
+				if ( objstmBaseObj instanceof COSStream )
+				{
+					// parse object stream
+					PDFObjectStreamParser parser =	new PDFObjectStreamParser( (COSStream) objstmBaseObj, document, forceParsing );
+					parser.parse();
+
+					// get set of object numbers referenced for this object stream
+					final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers( objstmObjNr );
+
+					// register all objects which are referenced to be contained in object stream
+					for( COSObject next : parser.getObjects() )
+					{
+						COSObjectKey stmObjKey = new COSObjectKey( next );
+						if ( refObjNrs.contains( stmObjKey.getNumber() ) )
+						{
+							COSObject stmObj = document.getObjectFromPool( stmObjKey );
+							stmObj.setObject( next.getObject() );
+						}
+					}
+				}
+			}
+		}   
+		return pdfObject.getObject();
+	}
+
+	protected int lastIndexOf( final char[] pattern, final byte[] buf, final int endOff )
+	{
+		int offset = super.lastIndexOf(pattern, buf, endOff);
+		if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) {
+			// this is the offset of the last %%EOF sequence.
+			// nothing should be present after this sequence.
+			int tmpOffset = offset + pattern.length;
+			if (tmpOffset != buf.length) {
+				// EOL is authorized
+				if ((buf.length - tmpOffset) > 2 || !(buf[tmpOffset] == 10 || buf[tmpOffset] == 13 || buf[tmpOffset+1] == 10)) {
+					addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,"File contains data after the last %%EOF sequence"));
+				}
+			}
+		}
+		return offset;
 	}
 }
\ No newline at end of file

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java Mon Sep 24 21:28:34 2012
@@ -53,7 +53,6 @@ import org.apache.pdfbox.preflight.Prefl
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.exception.ValidationException;
 import org.apache.pdfbox.preflight.utils.COSUtils;
-import org.apache.pdfbox.preflight.utils.PdfElementParser;
 
 public class TrailerValidationProcess extends AbstractProcess {
 
@@ -89,26 +88,14 @@ public class TrailerValidationProcess ex
 	 * @param result
 	 */
 	protected void checkTrailersForLinearizedPDF14(PreflightContext ctx) {
-		List<String> lTrailers = ctx.getPdfExtractor().getAllTrailers();
-
-		if (lTrailers.isEmpty()) {
+		COSDictionary first = ctx.getXrefTableResolver().getFirstTrailer();
+		if (first == null) {
 			addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER, "There are no trailer in the PDF file"));
-		} else {
-			String firstTrailer = lTrailers.get(0);
-			String lastTrailer = lTrailers.get(lTrailers.size() - 1);
-
-			COSDictionary first = null;
-			COSDictionary last = null;
+		} else {	
+			COSDictionary last = ctx.getXrefTableResolver().getLastTrailer();
 			COSDocument cosDoc = null;
 			try {
 				cosDoc = new COSDocument();
-
-				PdfElementParser parser1 = new PdfElementParser(cosDoc, firstTrailer.getBytes());
-				first = parser1.parseAsDictionary();
-
-				PdfElementParser parser2 = new PdfElementParser(cosDoc, lastTrailer.getBytes());
-				last = parser2.parseAsDictionary();
-
 				checkMainTrailer(ctx, first);
 				if (!compareIds(first, last, cosDoc)) {
 					addValidationError(ctx, new ValidationError(
@@ -194,7 +181,7 @@ public class TrailerValidationProcess ex
 		if (idFirst == null || idLast == null) {
 			return false;
 		}
-		
+
 		// ---- cast two COSBase to COSArray.
 		COSArray af = COSUtils.getAsArray(idFirst, cosDocument);
 		COSArray al = COSUtils.getAsArray(idLast, cosDocument);
@@ -260,7 +247,7 @@ public class TrailerValidationProcess ex
 				id = true;
 			}
 		}
-		
+
 		COSDocument cosDocument = ctx.getDocument().getDocument();
 		// PDF/A Trailer dictionary must contain the ID key
 		if (!id) {

Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
 
 import junit.framework.Assert;
 
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -51,13 +52,25 @@ public class TestInvalidDirectory {
 
 	@Test
 	public void validate () throws Exception {
+		PreflightDocument document = null;
+
 		System.out.println(target);
-		PreflightParser parser = new PreflightParser(new FileDataSource(target));
-		parser.parse();
-		PreflightDocument document = (PreflightDocument) parser.getPDDocument();
-		document.validate();
-		Assert.assertFalse(document.getResult().isValid());
-		document.close();
+		ValidationResult result = null;
+		try {
+			PreflightParser parser = new PreflightParser(new FileDataSource(target));
+			parser.parse();
+			document = (PreflightDocument)parser.getPDDocument();
+			document.validate();
+			result = document.getResult();
+		} catch (SyntaxValidationException e) {
+			result = e.getResult();
+		} finally {
+			if (document != null) {
+				document.close();
+			}
+		}
+		Assert.assertFalse("Test of " + target, result.isValid());
+
 	}
 
 	@Parameters

Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java Mon Sep 24 21:28:34 2012
@@ -37,6 +37,7 @@ import junit.framework.Assert;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.exception.ValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.AfterClass;
@@ -90,13 +91,21 @@ public class TestIsartorValidationFromCl
 		try {
 			System.out.println(path);
 			InputStream input = this.getClass().getResourceAsStream(path);
-			PreflightParser parser = new PreflightParser(new org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
-			parser.parse();
-			document = (PreflightDocument)parser.getPDDocument();
-			document.validate();
-			ValidationResult result = document.getResult();
+
+			ValidationResult result = null;
+			try {
+				PreflightParser parser = new PreflightParser(new org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
+				parser.parse();
+				document = (PreflightDocument)parser.getPDDocument();
+				document.validate();
+				result = document.getResult();
+			} catch (SyntaxValidationException e) {
+				result = e.getResult();
+			}
+
 			Assert.assertFalse(path + " : Isartor file should be invalid ("	+ path + ")", result.isValid());
 			Assert.assertTrue(path + " : Should find at least one error", result.getErrorsList().size() > 0);
+
 			// could contain more than one error
 			boolean found = false;
 			for (ValidationError error : result.getErrorsList()) {
@@ -104,8 +113,7 @@ public class TestIsartorValidationFromCl
 					found = true;
 				}
 				if (isartorResultFile != null) {
-					String log = path.replace(".pdf", "") + "#" 
-							+error.getErrorCode()+"#"+error.getDetails()+"\n";
+					String log = path.replace(".pdf", "") + "#" + error.getErrorCode()+"#"+error.getDetails()+"\n";
 					isartorResultFile.write(log.getBytes());
 				}
 			}
@@ -145,7 +153,7 @@ public class TestIsartorValidationFromCl
 		IOUtils.closeQuietly(expected);
 		// prepare config
 		List<Object[]> data = new ArrayList<Object[]>();
-        InputStream is = Class.class.getResourceAsStream("/Isartor testsuite.list");
+		InputStream is = Class.class.getResourceAsStream("/Isartor testsuite.list");
 		if (is != null)
 		{
 			BufferedReader reader = new BufferedReader(new InputStreamReader(is));

Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java (original)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
 
 import junit.framework.Assert;
 
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -38,58 +39,62 @@ import org.junit.runners.Parameterized.P
 
 @RunWith(Parameterized.class)
 public class TestValidDirectory {
-  
-  protected File target = null;
-  
-  public TestValidDirectory (File file) {
-    this.target = file;
-  }
-  
-  @Test
-  public void validate () throws Exception {
-  	PreflightDocument document = null;
-  	try {
-  		System.out.println(target);
-  		PreflightParser parser = new PreflightParser(new FileDataSource(target));
-  		parser.parse();
-  		document = (PreflightDocument) parser.getPDDocument();
-  		document.validate();
-  		Assert.assertTrue("Validation of " + target ,document.getResult().isValid());
-  	} finally {
-  		if (document != null) {
-  			document.close();
-  		}
-  	}
-  }
-  
-  @Parameters
-  public static Collection<Object[]> initializeParameters() throws Exception {
-    // check directory
-    File directory = null;
-    String pdfPath = System.getProperty("pdfa.valid", null);
-    if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
-    if (pdfPath!=null) {
-      directory = new File(pdfPath);
-      if (!directory.exists()) throw new Exception ("directory does not exists : "+directory.getAbsolutePath());
-      if (!directory.isDirectory()) throw new Exception ("not a directory : "+directory.getAbsolutePath());
-    } else {
-      System.err.println("System property 'pdfa.valid' not defined, will not run TestValidaDirectory");
-    }
-    // create list
-    if (directory==null) {
-      return new ArrayList<Object[]>(0);
-    } else {
-      File [] files = directory.listFiles();
-      List<Object[]> data = new ArrayList<Object[]>(files.length);
-      for (File file : files) {
-        if (file.isFile()) {
-          data.add(new Object [] {file});
-        }
-      }
-      return data;
-    }
-  }
-  
-  
-  
+
+	protected File target = null;
+
+	public TestValidDirectory (File file) {
+		this.target = file;
+	}
+
+	@Test
+	public void validate () throws Exception {
+		PreflightDocument document = null;
+		System.out.println(target);
+		ValidationResult result = null;
+		try {
+			PreflightParser parser = new PreflightParser(new FileDataSource(target));
+			parser.parse();
+			document = (PreflightDocument)parser.getPDDocument();
+			document.validate();
+			result = document.getResult();
+		} catch (SyntaxValidationException e) {
+			result = e.getResult();
+		} finally {
+			if (document != null) {
+				document.close();
+			}
+		}
+		Assert.assertTrue("Validation of " + target , result.isValid());
+	}
+
+	@Parameters
+	public static Collection<Object[]> initializeParameters() throws Exception {
+		// check directory
+		File directory = null;
+		String pdfPath = System.getProperty("pdfa.valid", null);
+		if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
+		if (pdfPath!=null) {
+			directory = new File(pdfPath);
+			if (!directory.exists()) throw new Exception ("directory does not exists : "+directory.getAbsolutePath());
+			if (!directory.isDirectory()) throw new Exception ("not a directory : "+directory.getAbsolutePath());
+		} else {
+			System.err.println("System property 'pdfa.valid' not defined, will not run TestValidaDirectory");
+		}
+		// create list
+		if (directory==null) {
+			return new ArrayList<Object[]>(0);
+		} else {
+			File [] files = directory.listFiles();
+			List<Object[]> data = new ArrayList<Object[]>(files.length);
+			for (File file : files) {
+				if (file.isFile()) {
+					data.add(new Object [] {file});
+				}
+			}
+			return data;
+		}
+	}
+
+
+
 }

Modified: pdfbox/trunk/preflight/src/test/resources/expected_errors.txt
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/resources/expected_errors.txt?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/resources/expected_errors.txt (original)
+++ pdfbox/trunk/preflight/src/test/resources/expected_errors.txt Mon Sep 24 21:28:34 2012
@@ -23,12 +23,12 @@ isartor-6-1-2-t01-fail-a.pdf=1.1
 isartor-6-1-2-t02-fail-a.pdf=1.1
 isartor-6-1-3-t01-fail-a.pdf=1.4.1
 isartor-6-1-3-t02-fail-a.pdf=1.4.2
-isartor-6-1-3-t03-fail-a.pdf=1.4
+isartor-6-1-3-t03-fail-a.pdf=1.4.10 // 1.4 due to JavaCC
 isartor-6-1-3-t04-fail-a.pdf=1.4.6 // Revoir le parser pour les linearized file (LIGNE VIDE qui encadre le body?????)
 isartor-6-1-4-t01-fail-a.pdf=1.3
 isartor-6-1-4-t02-fail-a.pdf=1.3
-isartor-6-1-6-t01-fail-a.pdf=1.2
-isartor-6-1-7-t01-fail-a.pdf=1.2
+isartor-6-1-6-t01-fail-a.pdf=1.0.11 // String Hex error - before was 1.2 due to JavaCC validation
+isartor-6-1-7-t01-fail-a.pdf=1.2.2 // Stream keyword must be followed by CR&LF or LF only- before was 1.2 due to JavaCC validation
 isartor-6-1-7-t02-fail-a.pdf=1.2.2
 isartor-6-1-7-t03-fail-a.pdf=1.2.5
 isartor-6-1-7-t04-fail-a.pdf=1.2.6
@@ -36,7 +36,7 @@ isartor-6-1-7-t04-fail-b.pdf=1.2.6
 isartor-6-1-7-t04-fail-c.pdf=1.2.6
 isartor-6-1-8-t01-fail-a.pdf=1.2.1
 isartor-6-1-8-t02-fail-a.pdf=1.2.1
-isartor-6-1-8-t03-fail-a.pdf=1.2
+isartor-6-1-8-t03-fail-a.pdf=1.2.1 // before was 1.2 due to JavaCC
 isartor-6-1-8-t04-fail-a.pdf=1.2.1
 isartor-6-1-8-t05-fail-a.pdf=1.2.1
 isartor-6-1-8-t06-fail-a.pdf=1.2.1