You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by gb...@apache.org on 2013/03/06 17:46:37 UTC
svn commit: r1453416 [11/16] - in /pdfbox/trunk/preflight: ./
src/main/java/org/apache/pdfbox/preflight/
src/main/java/org/apache/pdfbox/preflight/action/
src/main/java/org/apache/pdfbox/preflight/annotation/
src/main/java/org/apache/pdfbox/preflight/a...
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1453416&r1=1453415&r2=1453416&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Wed Mar 6 16:46:35 2013
@@ -87,667 +87,797 @@ import org.apache.pdfbox.preflight.Valid
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
-public class PreflightParser extends NonSequentialPDFParser {
- /**
- * Define a one byte encoding that hasn't specific encoding in UTF-8 charset.
- * Avoid unexpected error when the encoding is Cp5816
- */
- public static final Charset encoding = Charset.forName("ISO-8859-1");
-
- protected DataSource originalDocument;
-
- protected ValidationResult validationResult;
-
- protected PreflightDocument preflightDocument;
-
- protected PreflightContext ctx;
-
- public PreflightParser(File file, RandomAccess rafi) throws IOException {
- super(file, rafi);
- this.originalDocument = new FileDataSource(file);
- }
-
- public PreflightParser(File file) throws IOException {
- this(file, null);
- }
- public PreflightParser(String filename) throws IOException {
- this(new File(filename), null);
- }
-
- public PreflightParser(DataSource input) throws IOException {
- super(input.getInputStream());
- this.originalDocument = input;
- }
-
- /**
- * Create an instance of ValidationResult with a
- * ValidationError(UNKNOWN_ERROR)
- *
- * @return
- */
- protected static ValidationResult createUnknownErrorResult() {
- ValidationError error = new ValidationError(PreflightConstants.ERROR_UNKOWN_ERROR);
- ValidationResult result = new ValidationResult(error);
- return result;
- }
-
- /**
- * Add the error to the ValidationResult.
- * If the validationResult is null, an instance is created using the isWarning boolean of the
- * ValidationError to know if the ValidationResult must be flagged as Valid.
- * @param error
- */
- protected void addValidationError(ValidationError error) {
- if (this.validationResult == null) {
- this.validationResult = new ValidationResult(error.isWarning());
- }
- this.validationResult.addError(error);
- }
- protected void addValidationErrors(List<ValidationError> errors) {
- for (ValidationError error : errors) {
- addValidationError(error);
- }
- }
-
-
- public void parse() throws IOException {
- parse(Format.PDF_A1B);
- }
-
- /**
- * Parse the given file and check if it is a confirming file according to the given format.
- *
- * @param format format that the document should follow (default {@link Format#PDF_A1B})
- * @throws IOException
- */
- public void parse(Format format) throws IOException {
- parse(format, null);
- }
-
- /**
- * Parse the given file and check if it is a confirming file according to the given format.
- *
- * @param format format that the document should follow (default {@link Format#PDF_A1B})
- * @param config Configuration bean that will be used by the PreflightDocument.
- * If null the format is used to determine the default configuration.
- * @throws IOException
- */
- public void parse(Format format, PreflightConfiguration config) throws IOException {
- checkPdfHeader();
- try {
- super.parse();
- } catch (IOException e) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
- throw new SyntaxValidationException(e, this.validationResult);
- }
- Format formatToUse = (format == null ? Format.PDF_A1B : format);
- createPdfADocument(formatToUse, config);
- createContext();
- }
-
- protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException {
- COSDocument cosDocument = getDocument();
- this.preflightDocument = new PreflightDocument(cosDocument, format, config);
- }
-
- /**
- * Create a validation context.
- * This context is set to the PreflightDocument.
- */
- protected void createContext() {
- this.ctx = new PreflightContext(this.originalDocument);
- ctx.setDocument(preflightDocument);
- preflightDocument.setContext(ctx);
- ctx.setXrefTableResolver(xrefTrailerResolver);
- }
-
- @Override
- public PDDocument getPDDocument() throws IOException {
- preflightDocument.setResult(validationResult);
- // Add XMP MetaData
- return preflightDocument;
- }
-
- public PreflightDocument getPreflightDocument() throws IOException {
- return (PreflightDocument)getPDDocument();
- }
-
-
- // --------------------------------------------------------
- // - Below All methods that adds controls on the PDF syntax
- // --------------------------------------------------------
-
- @Override
- /**
- * Fill the CosDocument with some object that isn't set by the NonSequentialParser
- */
- protected void initialParse() throws IOException {
- super.initialParse();
-
- // fill xref table
- document.addXRefTable(xrefTrailerResolver.getXrefTable());
-
- // Trailer entries are useful in the preflight document
- for (COSBase trailerEntry : getDocument().getTrailer().getValues()) {
- if ( trailerEntry instanceof COSObject )
- {
- COSObject tmpObj = (COSObject) trailerEntry;
- parseObjectDynamically( tmpObj, true );
- }
- }
-
- // For each ObjectKey, we check if the object has been loaded
- Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
- for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) {
- COSObject co = document.getObjectFromPool(entry.getKey());
- if ( co.getObject() == null) {
- // object isn't loaded - parse the object to load its content
- parseObjectDynamically( co, true );
- }
- }
- }
-
- /**
- * Check that the PDF header match rules of the PDF/A specification.
- * First line (offset 0) must be a comment with the PDF version (version 1.0 isn't conform to the PDF/A specification)
- * Second line is a comment with at least 4 bytes greater than 0x80
- */
- protected void checkPdfHeader() {
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new InputStreamReader(new FileInputStream(getPdfFile()), encoding));
- String firstLine = reader.readLine();
- if (firstLine == null || (firstLine != null && !firstLine.matches("%PDF-1\\.[1-9]"))) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "First line must match %PDF-1.\\d"));
- }
-
- String secondLine = reader.readLine();
- byte[] secondLineAsBytes = secondLine.getBytes(encoding.name());
- if (secondLine != null && secondLineAsBytes.length >= 5) {
- for (int i = 0; i < secondLineAsBytes.length; ++i ) {
- byte b = secondLineAsBytes[i];
- if (i == 0 && ((char)b != '%')) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
- break;
- } else if (i > 0 && ((b & 0xFF) < 0x80)) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must contains at least 4 bytes greater than 127"));
- break;
- }
- }
- } else {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER ,"Second line must contains at least 4 bytes greater than 127"));
- }
-
- } catch (IOException e) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Unable to read the PDF file : " + e.getMessage()));
- } finally {
- IOUtils.closeQuietly(reader);
- }
- }
-
- /**
- * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls :
- * - EOL mandatory after the 'xref' keyword
- * - Cross reference subsection header uses single white space as separator
- * - and so on
- */
- protected boolean parseXrefTable( long startByteOffset ) throws IOException
- {
- if(pdfSource.peek() != 'x')
- {
- return false;
- }
- String xref = readString();
- if( !xref.equals( "xref" ) )
- {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character"));
- return false;
- }
- if (!nextIsEOL()) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL"));
- }
-
- // signal start of new XRef
- xrefTrailerResolver.nextXrefObj( startByteOffset );
-
- /*
- * Xref tables can have multiple sections.
- * Each starts with a starting object id and a count.
- */
- while(true)
- {
- // just after the xref<EOL> there are an integer
- int currObjID = 0; // first obj id
- int count = 0; // the number of objects in the xref table
-
- long offset = pdfSource.getOffset();
- String line = readLine();
- Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
- Matcher matcher = pattern.matcher(line);
- if (matcher.matches()) {
- currObjID = Integer.parseInt(matcher.group(1));
- count = Integer.parseInt(matcher.group(2));
- } else {
- addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid"));
- // reset pdfSource cursor to read xref information
- pdfSource.seek(offset);
- currObjID = readInt(); // first obj id
- count = readInt(); // the number of objects in the xref table
- }
-
- skipSpaces();
- for(int i = 0; i < count; i++)
- {
- if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek()))
- {
- break;
- }
- if(pdfSource.peek() == 't')
- {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found"));
- break;
- }
- //Ignore table contents
- String currentLine = readLine();
- String[] splitString = currentLine.split(" ");
- if (splitString.length < 3)
- {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine));
- break;
- }
- /* This supports the corrupt table as reported in
- * PDFBOX-474 (XXXX XXX XX n) */
- if(splitString[splitString.length-1].equals("n"))
- {
- try
- {
- long currOffset = Long.parseLong(splitString[0]);
- int currGenID = Integer.parseInt(splitString[1]);
- COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
- xrefTrailerResolver.setXRef(objKey, currOffset);
- }
- catch(NumberFormatException e)
- {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage()));
- }
- }
- else if(!splitString[2].equals("f"))
- {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID));
- }
- currObjID++;
- skipSpaces();
- }
- skipSpaces();
- char c = (char)pdfSource.peek();
- if(c < '0' || c > '9')
- {
- break;
- }
- }
- return true;
- }
-
- /**
- * Wraps the {@link NonSequentialPDFParser#parseCOSStream} to check rules on 'stream' and 'endstream' keywords.
- * {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
- */
- protected COSStream parseCOSStream( COSDictionary dic, RandomAccess file ) throws IOException {
- checkStreamKeyWord();
- COSStream result = super.parseCOSStream(dic, file);
- checkEndstreamKeyWord();
- return result;
- }
-
- /**
- * 'stream' must be followed by <CR><LF> or only <LF>
- * @throws IOException
- */
- protected void checkStreamKeyWord() throws IOException {
- String streamV = readString();
- if (!streamV.equals("stream")) {
- addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV +"'"));
- }
- int nextChar = pdfSource.read();
- if ( !((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10)) {
- addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword"));
- }
- // set the offset before stream
- pdfSource.seek(pdfSource.getOffset()-7);
- }
-
- /**
- * 'endstream' must be preceded by an EOL
- * @throws IOException
- */
- protected void checkEndstreamKeyWord() throws IOException {
- pdfSource.seek(pdfSource.getOffset()-10);
- if (!nextIsEOL()) {
- addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the endstream keyword"));
- }
- String endstreamV = readString();
- if (!endstreamV.equals("endstream")) {
- addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword but found '" + endstreamV +"'"));
- }
- }
-
- protected boolean nextIsEOL() throws IOException {
- boolean succeed = false;
- int nextChar = pdfSource.read();
- if ( nextChar == 13 && pdfSource.peek() == 10 ) {
- pdfSource.read();
- succeed = true;
- } else if ( nextChar == 13 || nextChar == 10 ) {
- succeed = true;
- }
- return succeed;
- }
-
- /**
- * @return true if the next character is a space. (The character is consumed)
- * @throws IOException
- */
- protected boolean nextIsSpace() throws IOException {
- return ' ' == pdfSource.read();
- }
-
- @Override
- /**
- * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array
- */
- protected COSArray parseCOSArray() throws IOException {
- COSArray result = super.parseCOSArray();
- if (result != null && result.size() > MAX_ARRAY_ELEMENTS) {
- addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size()));
- }
- return result;
- }
-
- @Override
- /**
- * Call {@link BaseParser#parseCOSName()} and check the length of the name
- */
- protected COSName parseCOSName() throws IOException {
- COSName result = super.parseCOSName();
- if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE) {
- addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long"));
- }
- return result;
- }
-
- /**
- * Check that the hexa string contains only an even number of Hexadecimal characters.
- * Once it is done, reset the offset at the beginning of the string and call {@link BaseParser#parseCOSString()}
- */
- protected COSString parseCOSString(boolean isDictionary) throws IOException
- {
- // offset reminder
- long offset = pdfSource.getOffset();
- char nextChar = (char)pdfSource.read();
- int count = 0;
- if (nextChar == '<') {
- do {
- nextChar = (char)pdfSource.read();
- if (nextChar != '>') {
- if (Character.digit((char)nextChar, 16) >= 0) {
- count++;
- } else {
- addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, "Hexa String must have only Hexadecimal Characters (found '" + nextChar +"')" ));
- break;
- }
- }
- } while (nextChar != '>');
- }
-
- if (count % 2 != 0) {
- addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, "Hexa string shall contain even number of non white space char"));
- }
-
- // reset the offset to parse the COSString
- pdfSource.seek(offset);
- COSString result = super.parseCOSString(isDictionary);
-
- if ( result.getString().length() > MAX_STRING_LENGTH) {
- addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long"));
- }
- return result;
- }
-
- /**
- * Call {@link BaseParser#parseDirObject()} check limit range for Float, Integer and number of Dictionary entries.
- */
- protected COSBase parseDirObject() throws IOException
- {
- COSBase result = super.parseDirObject();
-
-
- if (result instanceof COSNumber) {
- COSNumber number = (COSNumber)result;
- if (number instanceof COSFloat) {
- Double real = number.doubleValue();
- if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) {
- addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Float is too long or too small: " + real));
- }
- } else {
- long numAsLong = number.longValue();
- if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) {
- addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Numeric is too long or too small: " + numAsLong));
- }
- }
- }
-
- if (result instanceof COSDictionary) {
- COSDictionary dic = (COSDictionary)result;
- if (dic.size() > MAX_DICT_ENTRIES) {
- addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary"));
- }
- }
- return result;
- }
-
- protected COSBase parseObjectDynamically( int objNr, int objGenNr, boolean requireExistingNotCompressedObj ) throws IOException {
- // ---- create object key and get object (container) from pool
- final COSObjectKey objKey = new COSObjectKey( objNr, objGenNr );
- final COSObject pdfObject = document.getObjectFromPool( objKey );
-
- if ( pdfObject.getObject() == null )
- {
- // not previously parsed
- // ---- read offset or object stream object number from xref table
- Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get( objKey );
-
- // sanity test to circumvent loops with broken documents
- if ( requireExistingNotCompressedObj && ( ( offsetOrObjstmObNr == null ) ) ) {
- addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
- throw new SyntaxValidationException( "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
- }
-
- if ( offsetOrObjstmObNr == null ) {
- // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
- pdfObject.setObject( COSNull.NULL );
- } else if ( offsetOrObjstmObNr == 0 ) {
- addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration()+"} has an offset of 0"));
- } else if ( offsetOrObjstmObNr > 0 ) {
- // offset of indirect object in file
- // ---- go to object start
- setPdfSource( offsetOrObjstmObNr );
- // ---- we must have an indirect object
- int readObjNr = 0;
- int readObjGen = 0;
-
- long offset = pdfSource.getOffset();
- String line = readLine();
- Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
- Matcher matcher = pattern.matcher(line);
- if (matcher.matches()) {
- readObjNr = Integer.parseInt(matcher.group(1));
- readObjGen = Integer.parseInt(matcher.group(2));
- } else {
-
- addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
- // reset pdfSource cursor to read object information
- pdfSource.seek(offset);
- readObjNr = readInt();
- readObjGen = readInt();
- for ( char c : OBJ_MARKER )
- {
- if ( pdfSource.read() != c )
- {
- addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String( OBJ_MARKER ) + " but missed at character '" + c + "'" ));
- throw new SyntaxValidationException( "Expected pattern '" + new String( OBJ_MARKER ) + " but missed at character '" + c + "'" , validationResult);
- }
- }
- }
-
- // ---- consistency check
- if ( ( readObjNr != objKey.getNumber() ) || ( readObjGen != objKey.getGeneration() ) )
- {
- throw new IOException( "XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() +" points to wrong object: " + readObjNr + ":" + readObjGen );
- }
-
- skipSpaces();
- COSBase pb = parseDirObject();
- skipSpaces();
- long endObjectOffset = pdfSource.getOffset();
- String endObjectKey = readString();
-
- if ( endObjectKey.equals( "stream" ) )
- {
- pdfSource.seek(endObjectOffset);
- if( pb instanceof COSDictionary )
- {
- COSStream stream = parseCOSStream( (COSDictionary)pb, getDocument().getScratchFile() );
- if ( securityHandler != null )
- {
- try
- {
- securityHandler.decryptStream(stream, objNr, objGenNr );
- }
- catch ( CryptographyException ce )
- {
- throw new IOException( "Error decrypting stream object " + objNr + ": " + ce.getMessage()
- /*, ce // TODO: remove remark with Java 1.6 */ );
- }
- }
- pb = stream;
- }
- else
- {
- // this is not legal
- // the combination of a dict and the stream/endstream forms a complete stream object
- throw new IOException( "Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")." );
- }
- skipSpaces();
- endObjectOffset = pdfSource.getOffset();
- endObjectKey = readString();
-
- // we have case with a second 'endstream' before endobj
- if ( ! endObjectKey.startsWith( "endobj" ) )
- {
- if ( endObjectKey.startsWith( "endstream" ) )
- {
- endObjectKey = endObjectKey.substring( 9 ).trim();
- if ( endObjectKey.length() == 0 )
- {
- // no other characters in extra endstream line
- endObjectKey = readString(); // read next line
- }
- }
- }
- } else if ( securityHandler != null )
- {
- // decrypt
- if ( pb instanceof COSString )
- {
- decrypt( (COSString) pb, objNr, objGenNr );
- }
- else if ( pb instanceof COSDictionary )
- {
- for( Entry<COSName,COSBase> entry : ((COSDictionary) pb).entrySet() )
- {
- // TODO: specially handle 'Contents' entry of signature dictionary like in SecurityHandler#decryptDictionary
- if ( entry.getValue() instanceof COSString )
- {
- decrypt( (COSString) entry.getValue(), objNr, objGenNr );
- }
- }
- }
- else if ( pb instanceof COSArray )
- {
- final COSArray array = (COSArray) pb;
- for( int aIdx = 0, len = array.size(); aIdx < len; aIdx++ )
- {
- if ( array.get( aIdx ) instanceof COSString )
- {
- decrypt( (COSString) array.get( aIdx ), objNr, objGenNr );
- }
- }
- }
- }
-
- pdfObject.setObject( pb );
-
- if ( ! endObjectKey.startsWith( "endobj" ) )
- {
- throw new IOException( "Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'." );
- } else {
- offset = pdfSource.getOffset();
- pdfSource.seek(endObjectOffset-1);
- if (!nextIsEOL()) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword"));
- }
- pdfSource.seek(offset);
- }
-
- if (!nextIsEOL()) {
- addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword"));
- }
-
- releasePdfSourceInputStream();
- } else {
- // xref value is object nr of object stream containing object to be parsed;
- // since our object was not found it means object stream was not parsed so far
- final int objstmObjNr = (int) ( - offsetOrObjstmObNr );
- final COSBase objstmBaseObj = parseObjectDynamically( objstmObjNr, 0, true );
- if ( objstmBaseObj instanceof COSStream )
- {
- // parse object stream
- PDFObjectStreamParser parser = new PDFObjectStreamParser( (COSStream) objstmBaseObj, document, forceParsing );
- parser.parse();
-
- // get set of object numbers referenced for this object stream
- final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers( objstmObjNr );
-
- // register all objects which are referenced to be contained in object stream
- for( COSObject next : parser.getObjects() )
- {
- COSObjectKey stmObjKey = new COSObjectKey( next );
- if ( refObjNrs.contains( stmObjKey.getNumber() ) )
- {
- COSObject stmObj = document.getObjectFromPool( stmObjKey );
- stmObj.setObject( next.getObject() );
- }
- }
- }
- }
- }
- return pdfObject.getObject();
- }
-
- protected int lastIndexOf( final char[] pattern, final byte[] buf, final int endOff )
- {
- int offset = super.lastIndexOf(pattern, buf, endOff);
- if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) {
- // this is the offset of the last %%EOF sequence.
- // nothing should be present after this sequence.
- int tmpOffset = offset + pattern.length;
- if (tmpOffset != buf.length) {
- // EOL is authorized
- if ((buf.length - tmpOffset) > 2 || !(buf[tmpOffset] == 10 || buf[tmpOffset] == 13 || buf[tmpOffset+1] == 10)) {
- addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,"File contains data after the last %%EOF sequence"));
- }
- }
- }
- return offset;
- }
-}
\ No newline at end of file
+public class PreflightParser extends NonSequentialPDFParser
+{
+ /**
+ * Define a one byte encoding that hasn't specific encoding in UTF-8 charset. Avoid unexpected error when the
+ * encoding is Cp5816
+ */
+ public static final Charset encoding = Charset.forName("ISO-8859-1");
+
+ protected DataSource originalDocument;
+
+ protected ValidationResult validationResult;
+
+ protected PreflightDocument preflightDocument;
+
+ protected PreflightContext ctx;
+
+ public PreflightParser(File file, RandomAccess rafi) throws IOException
+ {
+ super(file, rafi);
+ this.originalDocument = new FileDataSource(file);
+ }
+
+ public PreflightParser(File file) throws IOException
+ {
+ this(file, null);
+ }
+
+ public PreflightParser(String filename) throws IOException
+ {
+ this(new File(filename), null);
+ }
+
+ public PreflightParser(DataSource input) throws IOException
+ {
+ super(input.getInputStream());
+ this.originalDocument = input;
+ }
+
+ /**
+ * Create an instance of ValidationResult with a ValidationError(UNKNOWN_ERROR)
+ *
+ * @return
+ */
+ protected static ValidationResult createUnknownErrorResult()
+ {
+ ValidationError error = new ValidationError(PreflightConstants.ERROR_UNKOWN_ERROR);
+ ValidationResult result = new ValidationResult(error);
+ return result;
+ }
+
+ /**
+ * Add the error to the ValidationResult. If the validationResult is null, an instance is created using the
+ * isWarning boolean of the ValidationError to know if the ValidationResult must be flagged as Valid.
+ *
+ * @param error
+ */
+ protected void addValidationError(ValidationError error)
+ {
+ if (this.validationResult == null)
+ {
+ this.validationResult = new ValidationResult(error.isWarning());
+ }
+ this.validationResult.addError(error);
+ }
+
+ protected void addValidationErrors(List<ValidationError> errors)
+ {
+ for (ValidationError error : errors)
+ {
+ addValidationError(error);
+ }
+ }
+
+ public void parse() throws IOException
+ {
+ parse(Format.PDF_A1B);
+ }
+
+ /**
+ * Parse the given file and check if it is a confirming file according to the given format.
+ *
+ * @param format
+ * format that the document should follow (default {@link Format#PDF_A1B})
+ * @throws IOException
+ */
+ public void parse(Format format) throws IOException
+ {
+ parse(format, null);
+ }
+
+ /**
+ * Parse the given file and check if it is a confirming file according to the given format.
+ *
+ * @param format
+ * format that the document should follow (default {@link Format#PDF_A1B})
+ * @param config
+ * Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
+ * the default configuration.
+ * @throws IOException
+ */
+ public void parse(Format format, PreflightConfiguration config) throws IOException
+ {
+ checkPdfHeader();
+ try
+ {
+ super.parse();
+ }
+ catch (IOException e)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
+ throw new SyntaxValidationException(e, this.validationResult);
+ }
+ Format formatToUse = (format == null ? Format.PDF_A1B : format);
+ createPdfADocument(formatToUse, config);
+ createContext();
+ }
+
+ protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException
+ {
+ COSDocument cosDocument = getDocument();
+ this.preflightDocument = new PreflightDocument(cosDocument, format, config);
+ }
+
+ /**
+ * Create a validation context. This context is set to the PreflightDocument.
+ */
+ protected void createContext()
+ {
+ this.ctx = new PreflightContext(this.originalDocument);
+ ctx.setDocument(preflightDocument);
+ preflightDocument.setContext(ctx);
+ ctx.setXrefTableResolver(xrefTrailerResolver);
+ }
+
+ @Override
+ public PDDocument getPDDocument() throws IOException
+ {
+ preflightDocument.setResult(validationResult);
+ // Add XMP MetaData
+ return preflightDocument;
+ }
+
+ public PreflightDocument getPreflightDocument() throws IOException
+ {
+ return (PreflightDocument) getPDDocument();
+ }
+
+ // --------------------------------------------------------
+ // - Below All methods that adds controls on the PDF syntax
+ // --------------------------------------------------------
+
+ @Override
+ /**
+ * Fill the CosDocument with some object that isn't set by the NonSequentialParser
+ */
+ protected void initialParse() throws IOException
+ {
+ super.initialParse();
+
+ // fill xref table
+ document.addXRefTable(xrefTrailerResolver.getXrefTable());
+
+ // Trailer entries are useful in the preflight document
+ for (COSBase trailerEntry : getDocument().getTrailer().getValues())
+ {
+ if (trailerEntry instanceof COSObject)
+ {
+ COSObject tmpObj = (COSObject) trailerEntry;
+ parseObjectDynamically(tmpObj, true);
+ }
+ }
+
+ // For each ObjectKey, we check if the object has been loaded
+ Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
+ for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet())
+ {
+ COSObject co = document.getObjectFromPool(entry.getKey());
+ if (co.getObject() == null)
+ {
+ // object isn't loaded - parse the object to load its content
+ parseObjectDynamically(co, true);
+ }
+ }
+ }
+
+ /**
+ * Check that the PDF header match rules of the PDF/A specification. First line (offset 0) must be a comment with
+ * the PDF version (version 1.0 isn't conform to the PDF/A specification) Second line is a comment with at least 4
+ * bytes greater than 0x80
+ */
+ protected void checkPdfHeader()
+ {
+ BufferedReader reader = null;
+ try
+ {
+ reader = new BufferedReader(new InputStreamReader(new FileInputStream(getPdfFile()), encoding));
+ String firstLine = reader.readLine();
+ if (firstLine == null || (firstLine != null && !firstLine.matches("%PDF-1\\.[1-9]")))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
+ "First line must match %PDF-1.\\d"));
+ }
+
+ String secondLine = reader.readLine();
+ byte[] secondLineAsBytes = secondLine.getBytes(encoding.name());
+ if (secondLine != null && secondLineAsBytes.length >= 5)
+ {
+ for (int i = 0; i < secondLineAsBytes.length; ++i)
+ {
+ byte b = secondLineAsBytes[i];
+ if (i == 0 && ((char) b != '%'))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
+ "Second line must contains at least 4 bytes greater than 127"));
+ break;
+ }
+ else if (i > 0 && ((b & 0xFF) < 0x80))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
+ "Second line must contains at least 4 bytes greater than 127"));
+ break;
+ }
+ }
+ }
+ else
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
+ "Second line must contains at least 4 bytes greater than 127"));
+ }
+
+ }
+ catch (IOException e)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
+ "Unable to read the PDF file : " + e.getMessage()));
+ }
+ finally
+ {
+ IOUtils.closeQuietly(reader);
+ }
+ }
+
+ /**
+ * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls : - EOL mandatory after
+ * the 'xref' keyword - Cross reference subsection header uses single white space as separator - and so on
+ */
+ protected boolean parseXrefTable(long startByteOffset) throws IOException
+ {
+ if (pdfSource.peek() != 'x')
+ {
+ return false;
+ }
+ String xref = readString();
+ if (!xref.equals("xref"))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "xref must be followed by a EOL character"));
+ return false;
+ }
+ if (!nextIsEOL())
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "xref must be followed by EOL"));
+ }
+
+ // signal start of new XRef
+ xrefTrailerResolver.nextXrefObj(startByteOffset);
+
+ /*
+ * Xref tables can have multiple sections. Each starts with a starting object id and a count.
+ */
+ while (true)
+ {
+ // just after the xref<EOL> there are an integer
+ int currObjID = 0; // first obj id
+ int count = 0; // the number of objects in the xref table
+
+ long offset = pdfSource.getOffset();
+ String line = readLine();
+ Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.matches())
+ {
+ currObjID = Integer.parseInt(matcher.group(1));
+ count = Integer.parseInt(matcher.group(2));
+ }
+ else
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF,
+ "Cross reference subsection header is invalid"));
+ // reset pdfSource cursor to read xref information
+ pdfSource.seek(offset);
+ currObjID = readInt(); // first obj id
+ count = readInt(); // the number of objects in the xref table
+ }
+
+ skipSpaces();
+ for (int i = 0; i < count; i++)
+ {
+ if (pdfSource.isEOF() || isEndOfName((char) pdfSource.peek()))
+ {
+ break;
+ }
+ if (pdfSource.peek() == 't')
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "Expected xref line but 't' found"));
+ break;
+ }
+ // Ignore table contents
+ String currentLine = readLine();
+ String[] splitString = currentLine.split(" ");
+ if (splitString.length < 3)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "invalid xref line: " + currentLine));
+ break;
+ }
+ /*
+ * This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
+ */
+ if (splitString[splitString.length - 1].equals("n"))
+ {
+ try
+ {
+ long currOffset = Long.parseLong(splitString[0]);
+ int currGenID = Integer.parseInt(splitString[1]);
+ COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
+ xrefTrailerResolver.setXRef(objKey, currOffset);
+ }
+ catch (NumberFormatException e)
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "offset or genid can't be read as number " + e.getMessage()));
+ }
+ }
+ else if (!splitString[2].equals("f"))
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
+ "Corrupt XRefTable Entry - ObjID:" + currObjID));
+ }
+ currObjID++;
+ skipSpaces();
+ }
+ skipSpaces();
+ char c = (char) pdfSource.peek();
+ if (c < '0' || c > '9')
+ {
+ break;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Wraps the {@link NonSequentialPDFParser#parseCOSStream} to check rules on 'stream' and 'endstream' keywords.
+ * {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
+ */
+ protected COSStream parseCOSStream(COSDictionary dic, RandomAccess file) throws IOException
+ {
+ checkStreamKeyWord();
+ COSStream result = super.parseCOSStream(dic, file);
+ checkEndstreamKeyWord();
+ return result;
+ }
+
+ /**
+ * 'stream' must be followed by <CR><LF> or only <LF>
+ *
+ * @throws IOException
+ */
+ protected void checkStreamKeyWord() throws IOException
+ {
+ String streamV = readString();
+ if (!streamV.equals("stream"))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
+ "Expected 'stream' keyword but found '" + streamV + "'"));
+ }
+ int nextChar = pdfSource.read();
+ if (!((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
+ "Expected 'EOL' after the stream keyword"));
+ }
+ // set the offset before stream
+ pdfSource.seek(pdfSource.getOffset() - 7);
+ }
+
+ /**
+ * 'endstream' must be preceded by an EOL
+ *
+ * @throws IOException
+ */
+ protected void checkEndstreamKeyWord() throws IOException
+ {
+ pdfSource.seek(pdfSource.getOffset() - 10);
+ if (!nextIsEOL())
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
+ "Expected 'EOL' before the endstream keyword"));
+ }
+ String endstreamV = readString();
+ if (!endstreamV.equals("endstream"))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
+ "Expected 'endstream' keyword but found '" + endstreamV + "'"));
+ }
+ }
+
+ protected boolean nextIsEOL() throws IOException
+ {
+ boolean succeed = false;
+ int nextChar = pdfSource.read();
+ if (nextChar == 13 && pdfSource.peek() == 10)
+ {
+ pdfSource.read();
+ succeed = true;
+ }
+ else if (nextChar == 13 || nextChar == 10)
+ {
+ succeed = true;
+ }
+ return succeed;
+ }
+
+ /**
+ * @return true if the next character is a space. (The character is consumed)
+ * @throws IOException
+ */
+ protected boolean nextIsSpace() throws IOException
+ {
+ return ' ' == pdfSource.read();
+ }
+
+ @Override
+ /**
+ * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array
+ */
+ protected COSArray parseCOSArray() throws IOException
+ {
+ COSArray result = super.parseCOSArray();
+ if (result != null && result.size() > MAX_ARRAY_ELEMENTS)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size()));
+ }
+ return result;
+ }
+
+ @Override
+ /**
+ * Call {@link BaseParser#parseCOSName()} and check the length of the name
+ */
+ protected COSName parseCOSName() throws IOException
+ {
+ COSName result = super.parseCOSName();
+ if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long"));
+ }
+ return result;
+ }
+
+ /**
+ * Check that the hexa string contains only an even number of Hexadecimal characters. Once it is done, reset the
+ * offset at the beginning of the string and call {@link BaseParser#parseCOSString()}
+ */
+ protected COSString parseCOSString(boolean isDictionary) throws IOException
+ {
+ // offset reminder
+ long offset = pdfSource.getOffset();
+ char nextChar = (char) pdfSource.read();
+ int count = 0;
+ if (nextChar == '<')
+ {
+ do
+ {
+ nextChar = (char) pdfSource.read();
+ if (nextChar != '>')
+ {
+ if (Character.digit((char) nextChar, 16) >= 0)
+ {
+ count++;
+ }
+ else
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID,
+ "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "')"));
+ break;
+ }
+ }
+ } while (nextChar != '>');
+ }
+
+ if (count % 2 != 0)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER,
+ "Hexa string shall contain even number of non white space char"));
+ }
+
+ // reset the offset to parse the COSString
+ pdfSource.seek(offset);
+ COSString result = super.parseCOSString(isDictionary);
+
+ if (result.getString().length() > MAX_STRING_LENGTH)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long"));
+ }
+ return result;
+ }
+
+ /**
+ * Call {@link BaseParser#parseDirObject()} check limit range for Float, Integer and number of Dictionary entries.
+ */
+ protected COSBase parseDirObject() throws IOException
+ {
+ COSBase result = super.parseDirObject();
+
+ if (result instanceof COSNumber)
+ {
+ COSNumber number = (COSNumber) result;
+ if (number instanceof COSFloat)
+ {
+ Double real = number.doubleValue();
+ if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
+ "Float is too long or too small: " + real));
+ }
+ }
+ else
+ {
+ long numAsLong = number.longValue();
+ if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
+ "Numeric is too long or too small: " + numAsLong));
+ }
+ }
+ }
+
+ if (result instanceof COSDictionary)
+ {
+ COSDictionary dic = (COSDictionary) result;
+ if (dic.size() > MAX_DICT_ENTRIES)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary"));
+ }
+ }
+ return result;
+ }
+
+ protected COSBase parseObjectDynamically(int objNr, int objGenNr, boolean requireExistingNotCompressedObj)
+ throws IOException
+ {
+ // ---- create object key and get object (container) from pool
+ final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
+ final COSObject pdfObject = document.getObjectFromPool(objKey);
+
+ if (pdfObject.getObject() == null)
+ {
+ // not previously parsed
+ // ---- read offset or object stream object number from xref table
+ Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get(objKey);
+
+ // sanity test to circumvent loops with broken documents
+ if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null)))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET,
+ "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":"
+ + objKey.getGeneration()));
+ throw new SyntaxValidationException("Object must be defined and must not be compressed object: "
+ + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
+ }
+
+ if (offsetOrObjstmObNr == null)
+ {
+ // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
+ pdfObject.setObject(COSNull.NULL);
+ }
+ else if (offsetOrObjstmObNr == 0)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber()
+ + ":" + objKey.getGeneration() + "} has an offset of 0"));
+ }
+ else if (offsetOrObjstmObNr > 0)
+ {
+ // offset of indirect object in file
+ // ---- go to object start
+ setPdfSource(offsetOrObjstmObNr);
+ // ---- we must have an indirect object
+ int readObjNr = 0;
+ int readObjGen = 0;
+
+ long offset = pdfSource.getOffset();
+ String line = readLine();
+ Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.matches())
+ {
+ readObjNr = Integer.parseInt(matcher.group(1));
+ readObjGen = Integer.parseInt(matcher.group(2));
+ }
+ else
+ {
+
+ addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
+ // reset pdfSource cursor to read object information
+ pdfSource.seek(offset);
+ readObjNr = readInt();
+ readObjGen = readInt();
+ for (char c : OBJ_MARKER)
+ {
+ if (pdfSource.read() != c)
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '"
+ + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
+ throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER)
+ + " but missed at character '" + c + "'", validationResult);
+ }
+ }
+ }
+
+ // ---- consistency check
+ if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration()))
+ {
+ throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration()
+ + " points to wrong object: " + readObjNr + ":" + readObjGen);
+ }
+
+ skipSpaces();
+ COSBase pb = parseDirObject();
+ skipSpaces();
+ long endObjectOffset = pdfSource.getOffset();
+ String endObjectKey = readString();
+
+ if (endObjectKey.equals("stream"))
+ {
+ pdfSource.seek(endObjectOffset);
+ if (pb instanceof COSDictionary)
+ {
+ COSStream stream = parseCOSStream((COSDictionary) pb, getDocument().getScratchFile());
+ if (securityHandler != null)
+ {
+ try
+ {
+ securityHandler.decryptStream(stream, objNr, objGenNr);
+ }
+ catch (CryptographyException ce)
+ {
+ throw new IOException("Error decrypting stream object " + objNr + ": "
+ + ce.getMessage()
+ /* , ce // TODO: remove remark with Java 1.6 */);
+ }
+ }
+ pb = stream;
+ }
+ else
+ {
+ // this is not legal
+ // the combination of a dict and the stream/endstream forms a complete stream object
+ throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
+ }
+ skipSpaces();
+ endObjectOffset = pdfSource.getOffset();
+ endObjectKey = readString();
+
+ // we have case with a second 'endstream' before endobj
+ if (!endObjectKey.startsWith("endobj"))
+ {
+ if (endObjectKey.startsWith("endstream"))
+ {
+ endObjectKey = endObjectKey.substring(9).trim();
+ if (endObjectKey.length() == 0)
+ {
+ // no other characters in extra endstream line
+ endObjectKey = readString(); // read next line
+ }
+ }
+ }
+ }
+ else if (securityHandler != null)
+ {
+ // decrypt
+ if (pb instanceof COSString)
+ {
+ decrypt((COSString) pb, objNr, objGenNr);
+ }
+ else if (pb instanceof COSDictionary)
+ {
+ for (Entry<COSName, COSBase> entry : ((COSDictionary) pb).entrySet())
+ {
+ // TODO: specially handle 'Contents' entry of signature dictionary like in
+ // SecurityHandler#decryptDictionary
+ if (entry.getValue() instanceof COSString)
+ {
+ decrypt((COSString) entry.getValue(), objNr, objGenNr);
+ }
+ }
+ }
+ else if (pb instanceof COSArray)
+ {
+ final COSArray array = (COSArray) pb;
+ for (int aIdx = 0, len = array.size(); aIdx < len; aIdx++)
+ {
+ if (array.get(aIdx) instanceof COSString)
+ {
+ decrypt((COSString) array.get(aIdx), objNr, objGenNr);
+ }
+ }
+ }
+ }
+
+ pdfObject.setObject(pb);
+
+ if (!endObjectKey.startsWith("endobj"))
+ {
+ throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset "
+ + offsetOrObjstmObNr + " does not end with 'endobj'.");
+ }
+ else
+ {
+ offset = pdfSource.getOffset();
+ pdfSource.seek(endObjectOffset - 1);
+ if (!nextIsEOL())
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
+ "EOL expected before the 'endobj' keyword"));
+ }
+ pdfSource.seek(offset);
+ }
+
+ if (!nextIsEOL())
+ {
+ addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
+ "EOL expected after the 'endobj' keyword"));
+ }
+
+ releasePdfSourceInputStream();
+ }
+ else
+ {
+ // xref value is object nr of object stream containing object to be parsed;
+ // since our object was not found it means object stream was not parsed so far
+ final int objstmObjNr = (int) (-offsetOrObjstmObNr);
+ final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
+ if (objstmBaseObj instanceof COSStream)
+ {
+ // parse object stream
+ PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document,
+ forceParsing);
+ parser.parse();
+
+ // get set of object numbers referenced for this object stream
+ final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr);
+
+ // register all objects which are referenced to be contained in object stream
+ for (COSObject next : parser.getObjects())
+ {
+ COSObjectKey stmObjKey = new COSObjectKey(next);
+ if (refObjNrs.contains(stmObjKey.getNumber()))
+ {
+ COSObject stmObj = document.getObjectFromPool(stmObjKey);
+ stmObj.setObject(next.getObject());
+ }
+ }
+ }
+ }
+ }
+ return pdfObject.getObject();
+ }
+
+ protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff)
+ {
+ int offset = super.lastIndexOf(pattern, buf, endOff);
+ if (offset > 0 && Arrays.equals(pattern, EOF_MARKER))
+ {
+ // this is the offset of the last %%EOF sequence.
+ // nothing should be present after this sequence.
+ int tmpOffset = offset + pattern.length;
+ if (tmpOffset != buf.length)
+ {
+ // EOL is authorized
+ if ((buf.length - tmpOffset) > 2
+ || !(buf[tmpOffset] == 10 || buf[tmpOffset] == 13 || buf[tmpOffset + 1] == 10))
+ {
+ addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,
+ "File contains data after the last %%EOF sequence"));
+ }
+ }
+ }
+ return offset;
+ }
+}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AbstractProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AbstractProcess.java?rev=1453416&r1=1453415&r2=1453416&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AbstractProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AbstractProcess.java Wed Mar 6 16:46:35 2013
@@ -26,15 +26,19 @@ import java.util.List;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
-public abstract class AbstractProcess implements ValidationProcess {
+public abstract class AbstractProcess implements ValidationProcess
+{
- protected void addValidationError(PreflightContext ctx, ValidationError error) {
- ctx.addValidationError(error);
- }
-
- protected void addValidationErrors(PreflightContext ctx, List<ValidationError> errors) {
- for (ValidationError error : errors) {
- addValidationError(ctx, error);
- }
- }
+ protected void addValidationError(PreflightContext ctx, ValidationError error)
+ {
+ ctx.addValidationError(error);
+ }
+
+ protected void addValidationErrors(PreflightContext ctx, List<ValidationError> errors)
+ {
+ for (ValidationError error : errors)
+ {
+ addValidationError(ctx, error);
+ }
+ }
}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java?rev=1453416&r1=1453415&r2=1453416&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java Wed Mar 6 16:46:35 2013
@@ -42,102 +42,116 @@ import org.apache.pdfbox.preflight.Valid
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.ContextHelper;
-public class AcroFormValidationProcess extends AbstractProcess {
+public class AcroFormValidationProcess extends AbstractProcess
+{
- public void validate(PreflightContext ctx) throws ValidationException {
- PDDocumentCatalog catalog = ctx.getDocument().getDocumentCatalog();
- if (catalog != null) {
- PDAcroForm acroForm = catalog.getAcroForm();
- if (acroForm != null) {
- checkNeedAppearences(ctx, acroForm);
- try {
- exploreFields(ctx, acroForm.getFields());
- } catch (IOException e) {
- throw new ValidationException("Unable to get the list of fields : " + e.getMessage(), e);
- }
- }
- } else {
- throw new ValidationException("There are no Catalog entry in the Document.");
- }
- }
-
- /**
- * This method checks if the NeedAppearances entry is present. If it is, the
- * value must be false.
- *
- * If the entry is invalid, the ERROR_SYNTAX_DICT_INVALID (1.2.3) error is
- * return.
- *
- * @param ctx
- * @param acroForm
- * @param result
- */
- protected void checkNeedAppearences(PreflightContext ctx, PDAcroForm acroForm) {
- if (acroForm.getDictionary().getBoolean(ACROFORM_DICTIONARY_KEY_NEED_APPEARANCES, false)) {
- addValidationError(ctx, new ValidationError(ERROR_SYNTAX_DICT_INVALID, "NeedAppearance is present with the value \"true\""));
- }
- }
-
- /**
- * This function explores all fields and their children to check if the A or
- * AA entry is present.
- *
- * @param ctx
- * @param acroForm
- * @param result
- * @throws IOException
- */
- protected boolean exploreFields(PreflightContext ctx, List<?> lFields)
- throws IOException {
- if (lFields != null) { // the list can be null is the Field doesn't have child
- for (Object obj : lFields) {
- if (!valideField(ctx, (PDField) obj)) {
- return false;
- }
- }
- }
- return true;
- }
-
- /**
- * A and AA field are forbidden, this method checks if they are present and
- * checks all child of this field. If the an Additional Action is present the
- * error code ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTIONS_FIELD (6.2.3) is added
- * to the error list If the an Action is present (in the Widget Annotation)
- * the error ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD (6.2.4) is added to
- * the error list. (Remark : The widget validation will be done by the
- * AnnotationValidationHelper, but some actions are authorized in a standard
- * Widget)
- *
- * @param ctx
- * @param aField
- * @return
- * @throws IOException
- */
- protected boolean valideField(PreflightContext ctx, PDField aField) throws IOException {
- boolean res = true;
- PDFormFieldAdditionalActions aa = aField.getActions();
- if (aa != null) {
- addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTIONS_FIELD, "\"AA\" must not be used in a Field dictionary"));
- res = false;
- }
-
- /*
- * The widget validation will be done by the widget annotation,
- * a widget contained in a Field can't have action.
- */
- PDAnnotationWidget widget = aField.getWidget();
- if (res && widget != null) {
- ContextHelper.validateElement(ctx, widget.getDictionary(), ANNOTATIONS_PROCESS);
- COSBase act = widget.getDictionary().getDictionaryObject(COSName.A);
- if (act != null) {
- addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD, "\"A\" must not be used in a Field dictionary"));
- res = false;
- }
- }
-
- res = res && exploreFields(ctx, aField.getKids());
- return res;
- }
+ public void validate(PreflightContext ctx) throws ValidationException
+ {
+ PDDocumentCatalog catalog = ctx.getDocument().getDocumentCatalog();
+ if (catalog != null)
+ {
+ PDAcroForm acroForm = catalog.getAcroForm();
+ if (acroForm != null)
+ {
+ checkNeedAppearences(ctx, acroForm);
+ try
+ {
+ exploreFields(ctx, acroForm.getFields());
+ }
+ catch (IOException e)
+ {
+ throw new ValidationException("Unable to get the list of fields : " + e.getMessage(), e);
+ }
+ }
+ }
+ else
+ {
+ throw new ValidationException("There are no Catalog entry in the Document.");
+ }
+ }
+
+ /**
+ * This method checks if the NeedAppearances entry is present. If it is, the value must be false.
+ *
+ * If the entry is invalid, the ERROR_SYNTAX_DICT_INVALID (1.2.3) error is return.
+ *
+ * @param ctx
+ * @param acroForm
+ * @param result
+ */
+ protected void checkNeedAppearences(PreflightContext ctx, PDAcroForm acroForm)
+ {
+ if (acroForm.getDictionary().getBoolean(ACROFORM_DICTIONARY_KEY_NEED_APPEARANCES, false))
+ {
+ addValidationError(ctx, new ValidationError(ERROR_SYNTAX_DICT_INVALID,
+ "NeedAppearance is present with the value \"true\""));
+ }
+ }
+
+ /**
+ * This function explores all fields and their children to check if the A or AA entry is present.
+ *
+ * @param ctx
+ * @param acroForm
+ * @param result
+ * @throws IOException
+ */
+ protected boolean exploreFields(PreflightContext ctx, List<?> lFields) throws IOException
+ {
+ if (lFields != null)
+ { // the list can be null is the Field doesn't have child
+ for (Object obj : lFields)
+ {
+ if (!valideField(ctx, (PDField) obj))
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * A and AA field are forbidden, this method checks if they are present and checks all child of this field. If the
+ * an Additional Action is present the error code ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTIONS_FIELD (6.2.3) is added
+ * to the error list If the an Action is present (in the Widget Annotation) the error
+ * ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD (6.2.4) is added to the error list. (Remark : The widget validation
+ * will be done by the AnnotationValidationHelper, but some actions are authorized in a standard Widget)
+ *
+ * @param ctx
+ * @param aField
+ * @return
+ * @throws IOException
+ */
+ protected boolean valideField(PreflightContext ctx, PDField aField) throws IOException
+ {
+ boolean res = true;
+ PDFormFieldAdditionalActions aa = aField.getActions();
+ if (aa != null)
+ {
+ addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTIONS_FIELD,
+ "\"AA\" must not be used in a Field dictionary"));
+ res = false;
+ }
+
+ /*
+ * The widget validation will be done by the widget annotation, a widget contained in a Field can't have action.
+ */
+ PDAnnotationWidget widget = aField.getWidget();
+ if (res && widget != null)
+ {
+ ContextHelper.validateElement(ctx, widget.getDictionary(), ANNOTATIONS_PROCESS);
+ COSBase act = widget.getDictionary().getDictionaryObject(COSName.A);
+ if (act != null)
+ {
+ addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD,
+ "\"A\" must not be used in a Field dictionary"));
+ res = false;
+ }
+ }
+
+ res = res && exploreFields(ctx, aField.getKids());
+ return res;
+ }
}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java?rev=1453416&r1=1453415&r2=1453416&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java Wed Mar 6 16:46:35 2013
@@ -37,121 +37,145 @@ import org.apache.pdfbox.preflight.excep
import org.apache.pdfbox.preflight.utils.COSUtils;
import org.apache.pdfbox.preflight.utils.ContextHelper;
-public class BookmarkValidationProcess extends AbstractProcess {
+public class BookmarkValidationProcess extends AbstractProcess
+{
- public void validate(PreflightContext ctx) throws ValidationException {
- PDDocumentCatalog catalog = ctx.getDocument().getDocumentCatalog();
- if (catalog != null) {
- PDDocumentOutline outlineHierarchy = catalog.getDocumentOutline();
- if (outlineHierarchy != null) {
- // Count entry is mandatory if there are childrens
- if (!isCountEntryPresent(outlineHierarchy.getCOSDictionary()) && (outlineHierarchy.getFirstChild() != null || outlineHierarchy.getLastChild() != null) ) {
- addValidationError(ctx, new ValidationError(
- ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
- "Outline Hierarchy doesn't have Count entry"));
- } else if ( isCountEntryPositive(ctx, outlineHierarchy.getCOSDictionary())
- && (outlineHierarchy.getFirstChild() == null || outlineHierarchy.getLastChild() == null)) {
- addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
- "Outline Hierarchy doesn't have First and/or Last entry(ies)"));
- } else {
- exploreOutlineLevel(ctx, outlineHierarchy.getFirstChild());
- }
- }
- } else {
- throw new ValidationException("There are no Catalog entry in the Document.");
- }
- }
-
- /**
- * Return true if the Count entry is present in the given dictionary.
- *
- * @param outline
- * @return
- */
- private boolean isCountEntryPresent(COSDictionary outline) {
- return outline.getItem(COSName.getPDFName("Count")) != null;
- }
- /**
- * return true if Count entry > 0
- * @param outline
- * @param doc
- * @return
- */
- private boolean isCountEntryPositive(PreflightContext ctx, COSDictionary outline) {
- COSBase countBase = outline.getItem(COSName.getPDFName("Count"));
- COSDocument cosDocument = ctx.getDocument().getDocument();
- return COSUtils.isInteger(countBase, cosDocument) && (COSUtils.getAsInteger(countBase, cosDocument)>0);
- }
- /**
- * This method explores the Outline Item Level and call a validation method on
- * each Outline Item. If an invalid outline item is found, the result list is
- * updated.
- *
- * @param inputItem
- * The first outline item of the level
- * @param ctx
- * The document handler which provides useful data for the level
- * exploration (ex : access to the PDDocument)
- * @return true if all items are valid in this level.
- * @throws ValidationException
- */
- protected boolean exploreOutlineLevel(PreflightContext ctx, PDOutlineItem inputItem)
- throws ValidationException {
- PDOutlineItem currentItem = inputItem;
- while (currentItem != null) {
- if (!validateItem(ctx, currentItem)) {
- return false;
- }
- currentItem = currentItem.getNextSibling();
- }
- return true;
- }
-
- /**
- * This method checks the inputItem dictionary and call the
- * exploreOutlineLevel method on the first child if it is not null.
- *
- * @param inputItem
- * outline item to validate
- * @param ctx
- * The document handler which provides useful data for the level
- * exploration (ex : access to the PDDocument)
- * @param result
- * @return
- * @throws ValidationException
- */
- protected boolean validateItem(PreflightContext ctx, PDOutlineItem inputItem)
- throws ValidationException {
- boolean isValid = true;
- // Dest entry isn't permitted if the A entry is present
- // A entry isn't permitted if the Dest entry is present
- // If the A enntry is present, the referenced actions is validated
- COSDictionary dictionary = inputItem.getCOSDictionary();
- COSBase dest = dictionary.getItem(COSName.DEST);
- COSBase action = dictionary.getItem(COSName.A);
-
- if (action != null && dest != null) {
- addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
- "Dest entry isn't permitted if the A entry is present"));
- return false;
- } else if (action != null) {
- ContextHelper.validateElement(ctx, dictionary, ACTIONS_PROCESS);
- } // else no specific validation
-
- // check children
- PDOutlineItem fChild = inputItem.getFirstChild();
- if (fChild != null) {
- if (!isCountEntryPresent(inputItem.getCOSDictionary())) {
- addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
- "Outline item doesn't have Count entry but has at least one descendant."));
- isValid = false;
- } else {
- // there are some descendants, so dictionary must have a Count entry
- isValid = isValid && exploreOutlineLevel(ctx, fChild);
- }
- }
+ public void validate(PreflightContext ctx) throws ValidationException
+ {
+ PDDocumentCatalog catalog = ctx.getDocument().getDocumentCatalog();
+ if (catalog != null)
+ {
+ PDDocumentOutline outlineHierarchy = catalog.getDocumentOutline();
+ if (outlineHierarchy != null)
+ {
+ // Count entry is mandatory if there are childrens
+ if (!isCountEntryPresent(outlineHierarchy.getCOSDictionary())
+ && (outlineHierarchy.getFirstChild() != null || outlineHierarchy.getLastChild() != null))
+ {
+ addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
+ "Outline Hierarchy doesn't have Count entry"));
+ }
+ else if (isCountEntryPositive(ctx, outlineHierarchy.getCOSDictionary())
+ && (outlineHierarchy.getFirstChild() == null || outlineHierarchy.getLastChild() == null))
+ {
+ addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
+ "Outline Hierarchy doesn't have First and/or Last entry(ies)"));
+ }
+ else
+ {
+ exploreOutlineLevel(ctx, outlineHierarchy.getFirstChild());
+ }
+ }
+ }
+ else
+ {
+ throw new ValidationException("There are no Catalog entry in the Document.");
+ }
+ }
+
+ /**
+ * Return true if the Count entry is present in the given dictionary.
+ *
+ * @param outline
+ * @return
+ */
+ private boolean isCountEntryPresent(COSDictionary outline)
+ {
+ return outline.getItem(COSName.getPDFName("Count")) != null;
+ }
+
+ /**
+ * return true if Count entry > 0
+ *
+ * @param outline
+ * @param doc
+ * @return
+ */
+ private boolean isCountEntryPositive(PreflightContext ctx, COSDictionary outline)
+ {
+ COSBase countBase = outline.getItem(COSName.getPDFName("Count"));
+ COSDocument cosDocument = ctx.getDocument().getDocument();
+ return COSUtils.isInteger(countBase, cosDocument) && (COSUtils.getAsInteger(countBase, cosDocument) > 0);
+ }
+
+ /**
+ * This method explores the Outline Item Level and call a validation method on each Outline Item. If an invalid
+ * outline item is found, the result list is updated.
+ *
+ * @param inputItem
+ * The first outline item of the level
+ * @param ctx
+ * The document handler which provides useful data for the level exploration (ex : access to the
+ * PDDocument)
+ * @return true if all items are valid in this level.
+ * @throws ValidationException
+ */
+ protected boolean exploreOutlineLevel(PreflightContext ctx, PDOutlineItem inputItem) throws ValidationException
+ {
+ PDOutlineItem currentItem = inputItem;
+ while (currentItem != null)
+ {
+ if (!validateItem(ctx, currentItem))
+ {
+ return false;
+ }
+ currentItem = currentItem.getNextSibling();
+ }
+ return true;
+ }
+
+ /**
+ * This method checks the inputItem dictionary and call the exploreOutlineLevel method on the first child if it is
+ * not null.
+ *
+ * @param inputItem
+ * outline item to validate
+ * @param ctx
+ * The document handler which provides useful data for the level exploration (ex : access to the
+ * PDDocument)
+ * @param result
+ * @return
+ * @throws ValidationException
+ */
+ protected boolean validateItem(PreflightContext ctx, PDOutlineItem inputItem) throws ValidationException
+ {
+ boolean isValid = true;
+ // Dest entry isn't permitted if the A entry is present
+ // A entry isn't permitted if the Dest entry is present
+ // If the A enntry is present, the referenced actions is validated
+ COSDictionary dictionary = inputItem.getCOSDictionary();
+ COSBase dest = dictionary.getItem(COSName.DEST);
+ COSBase action = dictionary.getItem(COSName.A);
+
+ if (action != null && dest != null)
+ {
+ addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
+ "Dest entry isn't permitted if the A entry is present"));
+ return false;
+ }
+ else if (action != null)
+ {
+ ContextHelper.validateElement(ctx, dictionary, ACTIONS_PROCESS);
+ } // else no specific validation
+
+ // check children
+ PDOutlineItem fChild = inputItem.getFirstChild();
+ if (fChild != null)
+ {
+ if (!isCountEntryPresent(inputItem.getCOSDictionary()))
+ {
+ addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID,
+ "Outline item doesn't have Count entry but has at least one descendant."));
+ isValid = false;
+ }
+ else
+ {
+ // there are some descendants, so dictionary must have a Count entry
+ isValid = isValid && exploreOutlineLevel(ctx, fChild);
+ }
+ }
- return isValid;
- }
+ return isValid;
+ }
}