You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2011/07/24 16:02:33 UTC
svn commit: r1150373 [10/12] - in /pdfbox/trunk/preflight: ./ src/ src/main/
src/main/java/ src/main/java/org/ src/main/java/org/apache/
src/main/java/org/apache/padaf/ src/main/java/org/apache/padaf/preflight/
src/main/java/org/apache/padaf/preflight/...
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/ContentStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/ContentStreamEngine.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/ContentStreamEngine.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/ContentStreamEngine.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,456 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+import org.apache.padaf.preflight.DocumentHandler;
+import org.apache.padaf.preflight.ValidationException;
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+import org.apache.padaf.preflight.contentstream.ContentStreamException;
+import org.apache.padaf.preflight.contentstream.StubOperator;
+import org.apache.padaf.preflight.graphics.ICCProfileWrapper;
+import org.apache.padaf.preflight.graphics.color.ColorSpaceHelper;
+import org.apache.padaf.preflight.graphics.color.ColorSpaceHelperFactory;
+import org.apache.padaf.preflight.graphics.color.ColorSpaces;
+import org.apache.padaf.preflight.graphics.color.ColorSpaceHelperFactory.ColorSpaceRestriction;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
+import org.apache.pdfbox.util.PDFOperator;
+import org.apache.pdfbox.util.PDFStreamEngine;
+import org.apache.pdfbox.util.operator.BeginText;
+import org.apache.pdfbox.util.operator.Concatenate;
+import org.apache.pdfbox.util.operator.EndText;
+import org.apache.pdfbox.util.operator.GRestore;
+import org.apache.pdfbox.util.operator.GSave;
+import org.apache.pdfbox.util.operator.Invoke;
+import org.apache.pdfbox.util.operator.MoveText;
+import org.apache.pdfbox.util.operator.MoveTextSetLeading;
+import org.apache.pdfbox.util.operator.NextLine;
+import org.apache.pdfbox.util.operator.OperatorProcessor;
+import org.apache.pdfbox.util.operator.SetCharSpacing;
+import org.apache.pdfbox.util.operator.SetHorizontalTextScaling;
+import org.apache.pdfbox.util.operator.SetLineCapStyle;
+import org.apache.pdfbox.util.operator.SetLineDashPattern;
+import org.apache.pdfbox.util.operator.SetLineJoinStyle;
+import org.apache.pdfbox.util.operator.SetLineWidth;
+import org.apache.pdfbox.util.operator.SetMatrix;
+import org.apache.pdfbox.util.operator.SetNonStrokingCMYKColor;
+import org.apache.pdfbox.util.operator.SetNonStrokingColor;
+import org.apache.pdfbox.util.operator.SetNonStrokingColorSpace;
+import org.apache.pdfbox.util.operator.SetNonStrokingRGBColor;
+import org.apache.pdfbox.util.operator.SetStrokingCMYKColor;
+import org.apache.pdfbox.util.operator.SetStrokingColor;
+import org.apache.pdfbox.util.operator.SetStrokingColorSpace;
+import org.apache.pdfbox.util.operator.SetStrokingRGBColor;
+import org.apache.pdfbox.util.operator.SetTextFont;
+import org.apache.pdfbox.util.operator.SetTextLeading;
+import org.apache.pdfbox.util.operator.SetTextRenderingMode;
+import org.apache.pdfbox.util.operator.SetTextRise;
+import org.apache.pdfbox.util.operator.SetWordSpacing;
+
+import static org.apache.padaf.preflight.ValidationConstants.*;
+/**
+ * This class inherits from org.apache.pdfbox.util.PDFStreamEngine to allow the
+ * validation of specific rules in ContentStream.
+ */
+public abstract class ContentStreamEngine extends PDFStreamEngine {
+
+ protected DocumentHandler documentHandler = null;
+
+ protected Map<String,OperatorProcessor> contentStreamEngineOperators = new HashMap<String,OperatorProcessor>();
+
+ public ContentStreamEngine(DocumentHandler _handler) {
+ this.documentHandler = _handler;
+
+ // ---- Graphics operators
+ registerOperatorProcessor("w", new SetLineWidth());
+ registerOperatorProcessor("cm", new Concatenate());
+
+ registerOperatorProcessor("CS", new SetStrokingColorSpace());
+ registerOperatorProcessor("cs", new SetNonStrokingColorSpace());
+ registerOperatorProcessor("d", new SetLineDashPattern());
+ registerOperatorProcessor("Do", new Invoke());
+
+ registerOperatorProcessor("j", new SetLineJoinStyle());
+ registerOperatorProcessor("J", new SetLineCapStyle());
+ registerOperatorProcessor("K", new SetStrokingCMYKColor());
+ registerOperatorProcessor("k", new SetNonStrokingCMYKColor());
+
+ registerOperatorProcessor("rg", new SetNonStrokingRGBColor());
+ registerOperatorProcessor("RG", new SetStrokingRGBColor());
+
+ registerOperatorProcessor("SC", new SetStrokingColor());
+ registerOperatorProcessor("SCN", new SetStrokingColor());
+ registerOperatorProcessor("sc", new SetNonStrokingColor());
+ registerOperatorProcessor("scn", new SetNonStrokingColor());
+
+ // ---- Graphics state
+ registerOperatorProcessor("Q", new GRestore());
+ registerOperatorProcessor("q", new GSave());
+
+ // ---- Text operators
+ registerOperatorProcessor("BT", new BeginText());
+ registerOperatorProcessor("ET", new EndText());
+ registerOperatorProcessor("Tf", new SetTextFont());
+ registerOperatorProcessor("Tr", new SetTextRenderingMode());
+ registerOperatorProcessor("Tm", new SetMatrix());
+ registerOperatorProcessor("Td", new MoveText());
+ registerOperatorProcessor("T*", new NextLine());
+ registerOperatorProcessor("TD", new MoveTextSetLeading());
+ registerOperatorProcessor("Tc", new SetCharSpacing());
+ registerOperatorProcessor("TL", new SetTextLeading());
+ registerOperatorProcessor("Ts", new SetTextRise());
+ registerOperatorProcessor("Tw", new SetWordSpacing());
+ registerOperatorProcessor("Tz", new SetHorizontalTextScaling());
+
+ // ---- Do not use the PDFBox Operator, because of the PageDrawer class cast
+ // Or because the Operator doesn't exist
+ StubOperator so = new StubOperator();
+ registerOperatorProcessor("l", so);
+ registerOperatorProcessor("re", so);
+ registerOperatorProcessor("c", so);
+ registerOperatorProcessor("y", so);
+ registerOperatorProcessor("v", so);
+ registerOperatorProcessor("n", so);
+ registerOperatorProcessor("BI", so);
+ registerOperatorProcessor("EI", so);
+ registerOperatorProcessor("m", so);
+ registerOperatorProcessor("W*", so);
+ registerOperatorProcessor("W", so);
+ registerOperatorProcessor("h", so);
+
+ registerOperatorProcessor("Tj", so);
+ registerOperatorProcessor("TJ", so);
+ registerOperatorProcessor("'", so);
+ registerOperatorProcessor("\"", so);
+
+ registerOperatorProcessor("b", so);
+ registerOperatorProcessor("B", so);
+ registerOperatorProcessor("b*", so);
+ registerOperatorProcessor("B*", so);
+
+ registerOperatorProcessor("BDC", so);
+ registerOperatorProcessor("BMC", so);
+ registerOperatorProcessor("DP", so);
+ registerOperatorProcessor("EMC", so);
+
+ registerOperatorProcessor("d0", so);
+ registerOperatorProcessor("d1", so);
+
+ registerOperatorProcessor("f", so);
+ registerOperatorProcessor("F", so);
+ registerOperatorProcessor("f*", so);
+
+ registerOperatorProcessor("g", so);
+ registerOperatorProcessor("G", so);
+
+ registerOperatorProcessor("M", so);
+ registerOperatorProcessor("MP", so);
+
+ registerOperatorProcessor("gs", so);
+ registerOperatorProcessor("h", so);
+ registerOperatorProcessor("i", so);
+
+ registerOperatorProcessor("ri", so);
+ registerOperatorProcessor("s", so);
+ registerOperatorProcessor("S", so);
+ registerOperatorProcessor("sh", so);
+ }
+
+ public final void registerOperatorProcessor( String operator, OperatorProcessor op )
+ {
+ super.registerOperatorProcessor(operator, op);
+ contentStreamEngineOperators.put( operator, op );
+ }
+
+
+ /**
+ * Check operands of the "ri" operator. Operands must exist in the
+ * RenderingIntent list. (net.awl.edoc.pdfa.validation.utils.RenderingIntents)
+ *
+ * @param operator
+ * the "ri" operator
+ * @param arguments
+ * the "ri" operands
+ * @throws ContentStreamException
+ * ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY if the operand is invalid
+ */
+ protected void validRenderingIntent(PDFOperator operator, List arguments)
+ throws ContentStreamException {
+ if ("ri".equals(operator.getOperation())) {
+ if (!RenderingIntents.contains(arguments.get(0))) {
+ throwContentStreamException("Unexpected value '" + arguments.get(0)
+ + "' for ri operand. ", ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY);
+ }
+ }
+ }
+
+ /**
+ * Valid the number of graphic states if the operator is the Save Graphic state operator ("q")
+ * @param operator
+ * @throws ContentStreamException
+ */
+ protected void validNumberOfGraphicStates(PDFOperator operator) throws ContentStreamException {
+ if ("q".equals(operator.getOperation())) {
+ int numberOfGraphicStates = this.getGraphicsStack().size();
+ if (numberOfGraphicStates > MAX_GRAPHIC_STATES) {
+ throwContentStreamException("Too many graphic states", ERROR_GRAPHIC_TOO_MANY_GRAPHIC_STATES);
+ }
+ }
+ }
+
+ /**
+ * Throw a ContentStreamException if the LZW filter is used in a InlinedImage.
+ *
+ * @param operator
+ * the InlinedImage object (BI to EI)
+ * @throws ContentStreamException
+ */
+ protected void validImageFilter(PDFOperator operator)
+ throws ContentStreamException {
+ COSDictionary dict = operator.getImageParameters().getDictionary();
+ // ---- Search a Filter declaration in the InlinedImage dictionary.
+ // ---- The LZWDecode Filter is forbidden.
+ String filter = dict.getNameAsString(STREAM_DICTIONARY_KEY_F);
+ if (filter == null) {
+ filter = dict.getNameAsString(STREAM_DICTIONARY_KEY_FILTER);
+ }
+
+ String errorCode = FilterHelper.isAuthorizedFilter(filter);
+ if (errorCode != null) {
+ // --- LZW is forbidden.
+ if ( ERROR_SYNTAX_STREAM_INVALID_FILTER.equals(errorCode) ) {
+ throwContentStreamException("LZW filter can't be used in a PDF/A File", ERROR_SYNTAX_STREAM_INVALID_FILTER);
+ } else {
+ throwContentStreamException("This filter isn't defined in the PDF Reference Third Edition.", ERROR_SYNTAX_STREAM_UNDEFINED_FILTER);
+ }
+ }
+ }
+
+ /**
+ * This method validates if the ColorSpace used by the InlinedImage is
+ * consistent with the color space defined in OutputIntent dictionaries.
+ *
+ * @param operator
+ * the InlinedImage object (BI to EI)
+ * @throws ContentStreamException
+ */
+ protected void validImageColorSpace(PDFOperator operator)
+ throws ContentStreamException, IOException {
+ COSDictionary dict = operator.getImageParameters().getDictionary();
+
+ COSDocument doc = this.documentHandler.getDocument().getDocument();
+ COSBase csInlinedBase = dict.getItem(COSName
+ .getPDFName(STREAM_DICTIONARY_KEY_COLOR_SPACE));
+
+ ColorSpaceHelper csHelper = null;
+ if (csInlinedBase != null) {
+
+ if (COSUtils.isString(csInlinedBase, doc)) {
+ // ---- In InlinedImage only DeviceGray/RGB/CMYK and restricted Indexed
+ // color spaces
+ // are allowed.
+ String colorSpace = COSUtils.getAsString(csInlinedBase, doc);
+ ColorSpaces cs = null;
+
+ try {
+ cs = ColorSpaces.valueOf(colorSpace);
+ } catch (IllegalArgumentException e) {
+ // ---- The color space is unknown.
+ // ---- Try to access the resources dictionary, the color space can be
+ // a reference.
+ PDColorSpace pdCS = (PDColorSpace) this.getColorSpaces().get(
+ colorSpace);
+ if (pdCS != null) {
+ cs = ColorSpaces.valueOf(pdCS.getName());
+ csHelper = ColorSpaceHelperFactory.getColorSpaceHelper(pdCS,
+ documentHandler, ColorSpaceRestriction.ONLY_DEVICE);
+ }
+ }
+
+ if (cs == null) {
+ throwContentStreamException("The ColorSpace is unknown",
+ ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY);
+ }
+ }
+
+ if (csHelper == null) {
+ csHelper = ColorSpaceHelperFactory.getColorSpaceHelper(csInlinedBase,
+ documentHandler, ColorSpaceRestriction.ONLY_DEVICE);
+ }
+ List<ValidationError> errors = new ArrayList<ValidationError>();
+ try {
+ if (!csHelper.validate(errors)) {
+ ValidationError ve = errors.get(0);
+ throwContentStreamException(ve.getDetails(), ve.getErrorCode());
+ }
+ } catch (ValidationException e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+ }
+
+ /**
+ * This method validates if the ColorOperator can be used with the color space
+ * defined in OutputIntent dictionaries.
+ *
+ * @param operator
+ * the color operator
+ * @throws ContentStreamException
+ */
+ protected void checkColorOperators(String operation)
+ throws ContentStreamException {
+ if ("rg".equals(operation) || "RG".equals(operation)) {
+ ICCProfileWrapper iccpw = documentHandler.getIccProfileWrapper();
+ if (iccpw == null || !iccpw.isRGBColorSpace()) {
+ throwContentStreamException("The operator \"" + operation
+ + "\" can't be used with CMYK Profile",
+ ERROR_GRAPHIC_INVALID_COLOR_SPACE_RGB);
+ }
+ }
+
+ if ("k".equals(operation) || "K".equals(operation)) {
+ ICCProfileWrapper iccpw = documentHandler.getIccProfileWrapper();
+ if (iccpw == null || !iccpw.isCMYKColorSpace()) {
+ throwContentStreamException("The operator \"" + operation
+ + "\" can't be used with RGB Profile",
+ ERROR_GRAPHIC_INVALID_COLOR_SPACE_CMYK);
+ }
+ }
+
+ if ("g".equals(operation) || "G".equals(operation)) {
+ ICCProfileWrapper iccpw = documentHandler.getIccProfileWrapper();
+ if (iccpw == null) {
+ // ---- Gray is possible with RGB and CMYK color space
+ throwContentStreamException("The operator \"" + operation
+ + "\" can't be used without Color Profile",
+ ERROR_GRAPHIC_INVALID_COLOR_SPACE_MISSING);
+ }
+ }
+
+ if ("f".equals(operation) || "F".equals(operation)
+ || "f*".equals(operation) || "B".equals(operation)
+ || "B*".equals(operation) || "b".equals(operation)
+ || "b*".equals(operation)) {
+ ICCProfileWrapper iccpw = documentHandler.getIccProfileWrapper();
+ if (iccpw == null) {
+ // ---- The default fill color needs an OutputIntent
+ throwContentStreamException("The operator \"" + operation
+ + "\" can't be used without Color Profile",
+ ERROR_GRAPHIC_INVALID_COLOR_SPACE_MISSING);
+ }
+ }
+ }
+
+ /**
+ * This method validates if the ColorSpace used as operand is consistent with
+ * the color space defined in OutputIntent dictionaries.
+ *
+ * @param operator
+ * @param arguments
+ * @throws IOException
+ */
+ protected void checkSetColorSpaceOperators(PDFOperator operator,
+ List<?> arguments) throws IOException {
+ if (!("CS".equals(operator.getOperation()) || "cs".equals(operator
+ .getOperation()))) {
+ return;
+ }
+
+ String colorSpaceName = null;
+ if (arguments.get(0) instanceof String) {
+ colorSpaceName = (String) arguments.get(0);
+ } else if (arguments.get(0) instanceof COSString) {
+ colorSpaceName = ((COSString) arguments.get(0)).toString();
+ } else if (arguments.get(0) instanceof COSName) {
+ colorSpaceName = ((COSName) arguments.get(0)).getName();
+ } else {
+ throwContentStreamException("The operand doesn't have the expected type",
+ ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY);
+ }
+
+ ColorSpaceHelper csHelper = null;
+ ColorSpaces cs = null;
+ try {
+ cs = ColorSpaces.valueOf(colorSpaceName);
+ } catch (IllegalArgumentException e) {
+ // ---- The color space is unknown.
+ // ---- Try to access the resources dictionary, the color space can be a
+ // reference.
+ PDColorSpace pdCS = (PDColorSpace) this.getColorSpaces().get(
+ colorSpaceName);
+ if (pdCS != null) {
+ cs = ColorSpaces.valueOf(pdCS.getName());
+ csHelper = ColorSpaceHelperFactory.getColorSpaceHelper(pdCS,
+ documentHandler, ColorSpaceRestriction.NO_RESTRICTION);
+ }
+ }
+
+ if (cs == null) {
+ throwContentStreamException("The ColorSpace is unknown",
+ ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY);
+ }
+
+ if (csHelper == null) {
+ csHelper = ColorSpaceHelperFactory.getColorSpaceHelper(COSName
+ .getPDFName(colorSpaceName), documentHandler,
+ ColorSpaceRestriction.NO_RESTRICTION);
+ }
+
+ List<ValidationError> errors = new ArrayList<ValidationError>();
+ try {
+ if (!csHelper.validate(errors)) {
+ ValidationError ve = errors.get(0);
+ throwContentStreamException(ve.getDetails(), ve.getErrorCode());
+ }
+ } catch (ValidationException e) {
+ // throw new IOException(e.getMessage(), e); java 6
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ /**
+ * Build a ContentStreamException using the given parameters
+ *
+ * @param msg
+ * exception details
+ * @param errorCode
+ * the error code.
+ * @throws ContentStreamException
+ */
+ protected void throwContentStreamException(String msg, String errorCode)
+ throws ContentStreamException {
+ ContentStreamException cex = new ContentStreamException(msg);
+ cex.setValidationError(errorCode);
+ throw cex;
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/ContentStreamEngine.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/FilterHelper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/FilterHelper.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/FilterHelper.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/FilterHelper.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,113 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.utils;
+
+import static org.apache.padaf.preflight.ValidationConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER;
+import static org.apache.padaf.preflight.ValidationConstants.ERROR_SYNTAX_STREAM_UNDEFINED_FILTER;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_ASCII_85;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_ASCII_HEX;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_CCITTFF;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_DCT;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_FLATE_DECODE;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_LZW;
+import static org.apache.padaf.preflight.ValidationConstants.INLINE_DICTIONARY_VALUE_FILTER_RUN;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_ASCII_85;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_ASCII_HEX;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_CCITTFF;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_DCT;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_FLATE_DECODE;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_JBIG;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_LZW;
+import static org.apache.padaf.preflight.ValidationConstants.STREAM_DICTIONARY_VALUE_FILTER_RUN;
+
+import java.util.List;
+
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+
+
+public class FilterHelper {
+
+ /**
+ * This method checks if the filter is authorized for a PDF/A file.
+ * According to the PDF/A-1 specification, only the LZW filter is forbidden due to
+ * Copyright compatibility. Because of the PDF/A is based on the PDF1.4 specification,
+ * all filters that aren't declared in the PDF Reference Third Edition are rejected.
+ *
+ * @param filter the filter to checks
+ * @param errors the list of validation errors
+ * @return true if the filter is authorized, false otherwise.
+ */
+ public static boolean isAuthorizedFilter(String filter, List<ValidationError> errors) {
+ String errorCode = isAuthorizedFilter(filter);
+ if (errorCode != null) {
+ // --- LZW is forbidden.
+ if ( ERROR_SYNTAX_STREAM_INVALID_FILTER.equals(errorCode) ) {
+ errors.add(new ValidationError(ERROR_SYNTAX_STREAM_INVALID_FILTER, "LZWDecode is forbidden"));
+ return false;
+ } else {
+ errors.add(new ValidationError(ERROR_SYNTAX_STREAM_UNDEFINED_FILTER, "This filter isn't defined in the PDF Reference Third Edition : "+filter));
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * This method checks if the filter is authorized for a PDF/A file.
+ * According to the PDF/A-1 specification, only the LZW filter is forbidden due to
+ * Copyright compatibility. Because of the PDF/A is based on the PDF1.4 specification,
+ * all filters that aren't declared in the PDF Reference Third Edition are rejected.
+ *
+ * @param filter
+ * @return null if validation succeed, the errorCode if the validation failed
+ */
+ public static String isAuthorizedFilter(String filter) {
+ if (filter != null) {
+ // --- LZW is forbidden.
+ if (STREAM_DICTIONARY_VALUE_FILTER_LZW.equals(filter) || INLINE_DICTIONARY_VALUE_FILTER_LZW.equals(filter) ) {
+ return ERROR_SYNTAX_STREAM_INVALID_FILTER;
+ }
+
+ // --- Filters declared in the PDF Reference for PDF 1.4
+ // --- Other Filters are considered as invalid to avoid not consistent behaviour
+ boolean definedFilter = STREAM_DICTIONARY_VALUE_FILTER_FLATE_DECODE.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_ASCII_HEX.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_ASCII_85.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_CCITTFF.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_DCT.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_JBIG.equals(filter);
+ definedFilter = definedFilter || STREAM_DICTIONARY_VALUE_FILTER_RUN.equals(filter);
+
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_FLATE_DECODE.equals(filter);
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_ASCII_HEX.equals(filter);
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_ASCII_85.equals(filter);
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_CCITTFF.equals(filter);
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_DCT.equals(filter);
+ definedFilter = definedFilter || INLINE_DICTIONARY_VALUE_FILTER_RUN.equals(filter);
+
+ if (!definedFilter) {
+ return ERROR_SYNTAX_STREAM_UNDEFINED_FILTER;
+ }
+ }
+ return null;
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/FilterHelper.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/PdfElementParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/PdfElementParser.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/PdfElementParser.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/PdfElementParser.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,70 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.utils;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.pdfparser.BaseParser;
+
+/**
+ * This class is a tool to parse a byte array as a COS object (COSDIctionary)
+ */
+public class PdfElementParser extends BaseParser {
+
+ /**
+ * Create the PDFElementParser object.
+ *
+ * @param cd
+ * a COSDocument which will be used to parse the byte array
+ * @param input
+ * the byte array to parse
+ * @throws IOException
+ */
+ public PdfElementParser(COSDocument cd, byte[] input) throws IOException {
+ super(input);
+ this.document = cd;
+ }
+
+ /**
+ * Parse the input byte array of the constructor call as a COSDictionary.
+ *
+ * @return a COSDictionary if the parsing succeed.
+ * @throws IOException
+ * If the byte array isn't a COSDictionary or if there are an error
+ * on the stream parsing
+ */
+ public COSDictionary parseAsDictionary() throws IOException {
+ return parseCOSDictionary();
+ }
+
+ /**
+ * Return the COSDocument used to create this object.
+ *
+ * @return
+ */
+ public COSDocument getDocument() {
+ return this.document;
+ }
+
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/PdfElementParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/RenderingIntents.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/RenderingIntents.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/RenderingIntents.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/RenderingIntents.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,59 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.utils;
+
+import static org.apache.padaf.preflight.ValidationConstants.RENDERING_INTENT_ABS_COLOR;
+import static org.apache.padaf.preflight.ValidationConstants.RENDERING_INTENT_PERCEPTUAL;
+import static org.apache.padaf.preflight.ValidationConstants.RENDERING_INTENT_REL_COLOR;
+import static org.apache.padaf.preflight.ValidationConstants.RENDERING_INTENT_SATURATION;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This class contains a static list of RenderingIntent values to allow an easy
+ * RenderingIntent value validation. Here is the content of the RenderingIntent
+ * list :
+ * <UL>
+ * <li>Perceptual
+ * <li>Saturation
+ * <li>AbsoluteColorimetric
+ * <li>RelativeColorimetric
+ * </UL>
+ */
+public class RenderingIntents {
+ private static List<String> RENDERING_INTENTS;
+
+ static {
+ ArrayList<String> al = new ArrayList<String>(4);
+ al.add(RENDERING_INTENT_REL_COLOR);
+ al.add(RENDERING_INTENT_ABS_COLOR);
+ al.add(RENDERING_INTENT_PERCEPTUAL);
+ al.add(RENDERING_INTENT_SATURATION);
+ RENDERING_INTENTS = Collections.unmodifiableList(al);
+ }
+
+ public static boolean contains(Object riArg) {
+ return RENDERING_INTENTS.contains(riArg);
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/utils/RenderingIntents.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/PDFAIdentificationValidation.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/PDFAIdentificationValidation.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/PDFAIdentificationValidation.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/PDFAIdentificationValidation.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,109 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.xmp;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.padaf.preflight.ValidationConstants;
+import org.apache.padaf.preflight.ValidationException;
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+import org.apache.padaf.xmpbox.XMPMetadata;
+import org.apache.padaf.xmpbox.schema.PDFAIdentificationSchema;
+import org.apache.padaf.xmpbox.schema.XMPBasicSchema;
+
+
+/**
+ * Class which check if PDF/A Identification Schema contains good information
+ *
+ * @author Germain Costenobel
+ *
+ */
+public class PDFAIdentificationValidation {
+
+ /**
+ * Check if PDFAIdentification is valid
+ *
+ * @param document
+ * the PDF Document
+ * @param metadata
+ * the XMP MetaData
+ * @return List of validation errors
+ * @throws ValidationException
+ */
+ public List<ValidationError> validatePDFAIdentifer(XMPMetadata metadata)
+ throws ValidationException {
+ List<ValidationError> ve = new ArrayList<ValidationError>();
+ PDFAIdentificationSchema id = metadata.getPDFIdentificationSchema();
+ if (id == null) {
+ ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PDFA_ID_MISSING));
+ return ve;
+ }
+
+ // According to the PDF/A specification, the prefix must be pdfaid for this schema.
+ if (!id.getPrefix().equals(PDFAIdentificationSchema.IDPREFIX)) {
+ if (metadata.getSchema(PDFAIdentificationSchema.IDPREFIX, XMPBasicSchema.XMPBASICURI) == null) {
+ ve.add(UnexpectedPrefixFoundError(id.getPrefix(),
+ PDFAIdentificationSchema.IDPREFIX, PDFAIdentificationSchema.class.getName()));
+ } else {
+ id = (PDFAIdentificationSchema) metadata.getSchema(
+ PDFAIdentificationSchema.IDPREFIX, PDFAIdentificationSchema.IDURI);
+ }
+ }
+ checkConformanceLevel(ve, id.getConformanceValue());
+ checkPartNumber(ve, id.getPartValue());
+ return ve;
+ }
+
+ /**
+ * Return a validationError formatted when a schema has not the expected
+ * prefix
+ *
+ * @param prefFound
+ * @param prefExpected
+ * @param schema
+ * @return
+ */
+ protected ValidationError UnexpectedPrefixFoundError(String prefFound,
+ String prefExpected, String schema) {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(schema).append(" found but prefix used is '").append(prefFound)
+ .append("', prefix '").append(prefExpected).append("' is expected.");
+
+ return new ValidationError(
+ ValidationConstants.ERROR_METADATA_WRONG_NS_PREFIX, sb.toString());
+ }
+
+ protected void checkConformanceLevel(List<ValidationError> ve, String value) {
+ if (!(value.equals("A") || value.equals("B"))) {
+ ve.add(new ValidationError(
+ ValidationConstants.ERROR_METADATA_INVALID_PDFA_CONFORMANCE));
+ }
+ }
+
+ protected void checkPartNumber(List<ValidationError> ve, int value) {
+ if (value != 1) {
+ ve.add(new ValidationError(
+ ValidationConstants.ERROR_METADATA_INVALID_PDFA_VERSION_ID));
+ }
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/PDFAIdentificationValidation.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/RDFAboutAttributeConcordanceValidation.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/RDFAboutAttributeConcordanceValidation.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/RDFAboutAttributeConcordanceValidation.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/RDFAboutAttributeConcordanceValidation.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,97 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.xmp;
+
+import java.util.List;
+
+
+import org.apache.padaf.preflight.ValidationException;
+import org.apache.padaf.xmpbox.XMPMetadata;
+import org.apache.padaf.xmpbox.schema.XMPSchema;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * Class which all elements within an rdf:RDF have the same value for their
+ * rdf:about attributes
+ *
+ * @author Germain Costenobel
+ *
+ */
+public class RDFAboutAttributeConcordanceValidation {
+
+ /**
+ *
+ * @param metadata
+ * @return
+ * @throws DifferentRDFAboutException
+ * @throws ValidationException
+ */
+ public void validateRDFAboutAttributes(XMPMetadata metadata)
+ throws ValidationException, DifferentRDFAboutException {
+
+ List<XMPSchema> schemas = metadata.getAllSchemas();
+ if (schemas.size() == 0) {
+ throw new ValidationException(
+ "Schemas not found in the given metadata representation");
+ }
+ String about = schemas.get(0).getAboutValue();
+ // rdf:description must have an about attribute, it has been checked during
+ // parsing
+ Element e;
+ for (XMPSchema xmpSchema : schemas) {
+ e = xmpSchema.getElement();
+ checkRdfAbout(about, e);
+ }
+
+ }
+
+ private void checkRdfAbout(String about, Element e)
+ throws DifferentRDFAboutException {
+ // System.out.println(e.getTagName());
+ // TODO check if it need to test the 2 possibilities
+ if (!e.getAttribute("rdf:about").equals(about)) {
+ throw new DifferentRDFAboutException();
+ }
+ if (!e.getAttribute("about").equals(about)) {
+ throw new DifferentRDFAboutException();
+ }
+ if (e.hasChildNodes()) {
+ NodeList children = e.getChildNodes();
+ int size = children.getLength();
+ for (int i = 0; i < size; i++) {
+ if (children.item(i) instanceof Element) {
+ checkRdfAbout(about, (Element) children.item(i));
+ }
+ }
+ }
+ }
+
+ public static class DifferentRDFAboutException extends Exception {
+
+ private static final long serialVersionUID = 1L;
+
+ public DifferentRDFAboutException() {
+ super("all rdf:about in RDF:rdf must have the same value");
+ }
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/RDFAboutAttributeConcordanceValidation.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/SynchronizedMetaDataValidation.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/SynchronizedMetaDataValidation.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/SynchronizedMetaDataValidation.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/SynchronizedMetaDataValidation.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,533 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.xmp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Iterator;
+import java.util.List;
+
+
+import org.apache.padaf.preflight.ValidationConstants;
+import org.apache.padaf.preflight.ValidationException;
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+import org.apache.padaf.xmpbox.XMPMetadata;
+import org.apache.padaf.xmpbox.parser.DateConverter;
+import org.apache.padaf.xmpbox.schema.AdobePDFSchema;
+import org.apache.padaf.xmpbox.schema.DublinCoreSchema;
+import org.apache.padaf.xmpbox.schema.XMPBasicSchema;
+import org.apache.padaf.xmpbox.type.AbstractField;
+import org.apache.padaf.xmpbox.type.TextType;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
+
+/**
+ * Class which check if document information available in a document are
+ * synchronized with XMP
+ *
+ * @author Germain Costenobel
+ *
+ */
+public class SynchronizedMetaDataValidation {
+
+ /**
+ * Analyze if Title embedded in Document Information dictionary and in XMP
+ * properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param dc
+ * Dublin Core Schema
+ * @param ve
+ * The list of validation errors
+ */
+ protected void analyzeTitleProperty(PDDocumentInformation dico,
+ DublinCoreSchema dc, List<ValidationError> ve) {
+ String title = dico.getTitle();
+ if (title != null) {
+ if (dc != null) {
+ // Check the x-default value, if not found, check with the first value
+ // found
+ if (dc.getTitle() != null) {
+ if (dc.getTitleValue("x-default") != null) {
+ if (!dc.getTitleValue("x-default").equals(title)) {
+ ve.add(unsynchronizedMetaDataError("Title"));
+ }
+ } else {
+ // This search of first value is made just to keep compatibility
+ // with lot of PDF documents
+ // which use title without lang definition
+ // REM : MAY we have to delete this option in the future
+ Iterator<AbstractField> it = dc.getTitle().getContainer()
+ .getAllProperties().iterator();
+ if (it.hasNext()) {
+ AbstractField tmp = it.next();
+ if (tmp instanceof TextType) {
+ if (!((TextType) tmp).getStringValue().equals(title)) {
+ ve.add(unsynchronizedMetaDataError("Title"));
+ }
+ } else {
+ ve.add(AbsentXMPPropertyError("Title",
+ "Property is badly defined"));
+ }
+ } else {
+ ve
+ .add(AbsentXMPPropertyError("Title",
+ "Property is not defined"));
+ }
+ }
+
+ } else {
+ ve.add(AbsentXMPPropertyError("Title", "Property is not defined"));
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("Title", "Dublin Core"));
+ }
+ }
+ }
+
+ /**
+ * Analyze if Author(s) embedded in Document Information dictionary and in XMP
+ * properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param dc
+ * Dublin Core Schema
+ * @param ve
+ * The list of validation errors
+ */
+ protected void analyzeAuthorProperty(PDDocumentInformation dico,
+ DublinCoreSchema dc, List<ValidationError> ve) {
+ String author = dico.getAuthor();
+ if (author != null) {
+ if (dc != null) {
+ if (dc.getCreator() != null) {
+ if (dc.getCreatorValue().size() != 1) {
+ ve
+ .add(AbsentXMPPropertyError(
+ "Author",
+ "In XMP metadata, Author(s) must be represented by a single entry in a text array (dc:creator) "));
+ } else {
+ if (dc.getCreatorValue().get(0) == null) {
+ ve.add(AbsentXMPPropertyError("Author",
+ "Property is defined as null"));
+ } else {
+ if (!dc.getCreatorValue().get(0).equals(author)) {
+ ve.add(unsynchronizedMetaDataError("Author"));
+ }
+ }
+ }
+ } else {
+ ve.add(AbsentXMPPropertyError("Author",
+ "Property is not defined in XMP Metadata"));
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("Author", "Dublin Core"));
+ }
+ }
+ }
+
+ /**
+ * Analyze if Subject(s) embedded in Document Information dictionary and in
+ * XMP properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param dc
+ * Dublin Core Schema
+ * @param ve
+ * The list of validation errors
+ */
+ protected void analyzeSubjectProperty(PDDocumentInformation dico,
+ DublinCoreSchema dc, List<ValidationError> ve) {
+ String subject = dico.getSubject();
+ if (subject != null) {
+ if (dc != null) {
+ // PDF/A Conformance Erratum (2007) specifies XMP Subject
+ // as a Text type embedded in the dc:description["x-default"].
+ if (dc.getDescription() != null) {
+ if (dc.getDescriptionValue("x-default") == null) {
+ ve
+ .add(AbsentXMPPropertyError("Subject",
+ "Subject not found in XMP (dc:description[\"x-default\"] not found)"));
+ } else {
+ if (!dc.getDescriptionValue("x-default").equals(subject)) {
+ ve.add(unsynchronizedMetaDataError("Subject"));
+
+ }
+ }
+ } else {
+ ve.add(AbsentXMPPropertyError("Subject",
+ "Property is defined as null"));
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("Subject", "Dublin Core"));
+ }
+ }
+ }
+
+ /**
+ * Analyze if Keyword(s) embedded in Document Information dictionary and in
+ * XMP properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param pdf
+ * PDF Schema
+ * @param ve
+ * The list of validation errors
+ */
+ protected void analyzeKeywordsProperty(PDDocumentInformation dico,
+ AdobePDFSchema pdf, List<ValidationError> ve) {
+ String keyword = dico.getKeywords();
+ if (keyword != null) {
+ if (pdf != null) {
+ if (pdf.getKeywords() == null) {
+ ve.add(AbsentXMPPropertyError("Keywords", "Property is not defined"));
+ } else {
+ if (!pdf.getKeywordsValue().equals(keyword)) {
+ ve.add(unsynchronizedMetaDataError("Keywords"));
+ }
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("Keywords", "PDF"));
+ }
+ }
+ }
+
+ /**
+ * Analyze if Producer embedded in Document Information dictionary and in XMP
+ * properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param pdf
+ * PDF Schema
+ * @param ve
+ * The list of validation errors
+ */
+ protected void analyzeProducerProperty(PDDocumentInformation dico,
+ AdobePDFSchema pdf, List<ValidationError> ve) {
+ String producer = dico.getProducer();
+ if (producer != null) {
+ if (pdf != null) {
+ if (pdf.getProducer() == null) {
+ ve.add(AbsentXMPPropertyError("Producer", "Property is not defined"));
+ } else {
+ if (!pdf.getProducerValue().equals(producer)) {
+ ve.add(unsynchronizedMetaDataError("Producer"));
+ }
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("Producer", "PDF"));
+ }
+ }
+
+ }
+
+ /**
+ * Analyze if the creator tool embedded in Document Information dictionary and
+ * in XMP properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param xmp
+ * XMP Basic Schema
+ * @param ve
+ * The list of validation errors
+ *
+ */
+ protected void analyzeCreatorToolProperty(PDDocumentInformation dico,
+ XMPBasicSchema xmp, List<ValidationError> ve) {
+ String creatorTool = dico.getCreator();
+ if (creatorTool != null) {
+ if (xmp != null) {
+ if (xmp.getCreatorTool() == null) {
+ ve.add(AbsentXMPPropertyError("CreatorTool",
+ "Property is not defined"));
+ } else {
+ if (!xmp.getCreatorToolValue().equals(creatorTool)) {
+ ve.add(unsynchronizedMetaDataError("CreatorTool"));
+ }
+ }
+ } else {
+ ve.add(AbsentSchemaMetaDataError("CreatorTool", "PDF"));
+ }
+ }
+
+ }
+
+ /**
+ * Analyze if the CreationDate embedded in Document Information dictionary and
+ * in XMP properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param xmp
+ * XMP Basic Schema
+ * @param ve
+ * The list of validation errors
+ * @throws ValidationException
+ */
+ protected void analyzeCreationDateProperty(PDDocumentInformation dico,
+ XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException {
+ Calendar creationDate;
+ try {
+ creationDate = dico.getCreationDate();
+ } catch (IOException e) {
+ // If there is an error while converting this property to a date
+ throw formatAccessException("Document Information", "CreationDate", e);
+ }
+ if (creationDate != null) {
+ if (xmp != null) {
+ Calendar xmpCreationDate = xmp.getCreateDateValue();
+
+ if (xmpCreationDate == null) {
+ ve.add(AbsentXMPPropertyError("CreationDate",
+ "Property is not defined"));
+ } else {
+ if (!DateConverter.toISO8601(xmpCreationDate).equals(
+ DateConverter.toISO8601(creationDate))) {
+ ve.add(unsynchronizedMetaDataError("CreationDate"));
+ }
+ }
+
+ } else {
+ ve.add(AbsentSchemaMetaDataError("CreationDate", "Basic XMP"));
+ }
+ }
+ }
+
+ /**
+ * Analyze if the ModifyDate embedded in Document Information dictionary and
+ * in XMP properties are synchronized
+ *
+ * @param dico
+ * Document Information Dictionary
+ * @param xmp
+ * XMP Basic Schema
+ * @param ve
+ * The list of validation errors
+ * @throws ValidationException
+ */
+ protected void analyzeModifyDateProperty(PDDocumentInformation dico,
+ XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException {
+ Calendar modifyDate;
+ try {
+ modifyDate = dico.getModificationDate();
+ if (modifyDate != null) {
+ if (xmp != null) {
+
+ Calendar xmpModifyDate = xmp.getModifyDateValue();
+ if (xmpModifyDate == null) {
+ ve.add(AbsentXMPPropertyError("ModifyDate",
+ "Property is not defined"));
+ } else {
+ if (!DateConverter.toISO8601(xmpModifyDate).equals(
+ DateConverter.toISO8601(modifyDate))) {
+
+ ve.add(unsynchronizedMetaDataError("ModificationDate"));
+ }
+ }
+
+ } else {
+ ve.add(AbsentSchemaMetaDataError("ModifyDate", "Basic XMP"));
+ }
+ }
+ } catch (IOException e) {
+ // If there is an error while converting this property to a date
+ throw formatAccessException("Document Information", "ModifyDate", e);
+ }
+
+ }
+
+ /**
+ * Check if document information entries and XMP information are synchronized
+ *
+ * @param document
+ * the PDF Document
+ * @param metadata
+ * the XMP MetaData
+ * @return List of validation errors
+ * @throws ValidationException
+ */
+ public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata)
+ throws ValidationException {
+ List<ValidationError> ve = new ArrayList<ValidationError>();
+
+ if (document == null) {
+ throw new ValidationException("Document provided is null");
+ } else {
+ PDDocumentInformation dico = document.getDocumentInformation();
+ if (metadata == null) {
+ throw new ValidationException("Metadata provided are null");
+ } else {
+ DublinCoreSchema dc = metadata.getDublinCoreSchema();
+
+ // TITLE
+ analyzeTitleProperty(dico, dc, ve);
+ // AUTHOR
+ analyzeAuthorProperty(dico, dc, ve);
+ // SUBJECT
+ analyzeSubjectProperty(dico, dc, ve);
+
+ AdobePDFSchema pdf = metadata.getAdobePDFSchema();
+
+ // KEYWORDS
+ analyzeKeywordsProperty(dico, pdf, ve);
+ // PRODUCER
+ analyzeProducerProperty(dico, pdf, ve);
+
+ XMPBasicSchema xmp = metadata.getXMPBasicSchema();
+
+ // CREATOR TOOL
+ analyzeCreatorToolProperty(dico, xmp, ve);
+
+ // CREATION DATE
+ analyzeCreationDateProperty(dico, xmp, ve);
+
+ // MODIFY DATE
+ analyzeModifyDateProperty(dico, xmp, ve);
+
+ }
+
+ }
+ return ve;
+ }
+
+ /**
+ * Return a validationError formatted when a schema has not the expected
+ * prefix
+ *
+ * @param prefFound
+ * @param prefExpected
+ * @param schema
+ * @return
+ */
+ protected ValidationError UnexpectedPrefixFoundError(String prefFound,
+ String prefExpected, String schema) {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(schema).append(" found but prefix used is '").append(prefFound)
+ .append("', prefix '").append(prefExpected).append("' is expected.");
+
+ return new ValidationError(
+ ValidationConstants.ERROR_METADATA_WRONG_NS_PREFIX, sb.toString());
+ }
+
+ /**
+ * Return an exception formatted on IOException when accessing metadata
+ *
+ * @param type
+ * type of property (Document Info or XMP)
+ * @param target
+ * the name of the metadata
+ * @param cause
+ * the raised IOException
+ * @return the generated exception
+ */
+ protected ValidationException formatAccessException(String type,
+ String target, Throwable cause) {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("Cannot treat ").append(type).append(" ").append(target).append(
+ " property");
+ return new ValidationException(sb.toString(), cause);
+ }
+
+ /**
+ * Return an exception formatted on IOException when accessing on metadata
+ * schema
+ *
+ * @param target
+ * the name of the schema
+ * @param cause
+ * the raised IOException
+ * @return the generated exception
+ */
+ protected ValidationException SchemaAccessException(String target,
+ Throwable cause) {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("Cannot access to the ").append(target).append(" schema");
+ return new ValidationException(sb.toString(), cause);
+ }
+
+ /**
+ * Return a formatted validation error when metadata are not synchronized
+ *
+ * @param target
+ * the concerned property
+ * @return the generated validation error
+ */
+ protected ValidationError unsynchronizedMetaDataError(String target) {
+ StringBuilder sb = new StringBuilder(80);
+ sb
+ .append(target)
+ .append(
+ " present in the document catalog dictionary doesn't match with XMP information");
+ return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
+ .toString());
+ }
+
+ /**
+ * Return a formatted validation error when a specific metadata schema can't
+ * be found
+ *
+ * @param target
+ * the concerned property
+ * @param schema
+ * the XMP schema which can't be found
+ * @return the generated validation error
+ */
+ protected ValidationError AbsentSchemaMetaDataError(String target,
+ String schema) {
+ StringBuilder sb = new StringBuilder(80);
+ sb
+ .append(target)
+ .append(
+ " present in the document catalog dictionary can't be found in XMP information (")
+ .append(schema).append(" schema not declared)");
+ return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
+ .toString());
+ }
+
+ /**
+ * Return a formatted validation error when a specific XMP property can't be
+ * found
+ *
+ * @param target
+ * the concerned property
+ * @param details
+ * comments about the XMP property
+ * @return the generated validation error
+ */
+ protected ValidationError AbsentXMPPropertyError(String target, String details) {
+ StringBuilder sb = new StringBuilder(80);
+ sb
+ .append(target)
+ .append(
+ " present in the document catalog dictionary can't be found in XMP information (")
+ .append(details).append(")");
+ return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
+ .toString());
+ }
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/SynchronizedMetaDataValidation.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/XpacketParsingException.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/XpacketParsingException.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/XpacketParsingException.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/XpacketParsingException.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,70 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight.xmp;
+
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+
+/**
+ * This exception is raised when the parsing of the xpacket fails
+ *
+ */
+public class XpacketParsingException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ protected ValidationError error;
+
+ /**
+ * Constructor
+ *
+ * @param message
+ * the message
+ * @param cause
+ * the cause
+ */
+ public XpacketParsingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Constructor
+ *
+ * @param message
+ * the message
+ */
+ public XpacketParsingException(String message) {
+ super(message);
+ }
+
+ public XpacketParsingException(String message, ValidationError error) {
+ super(message);
+ this.error = error;
+ }
+
+ public ValidationError getError() {
+ return error;
+ }
+
+}
Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/padaf/preflight/xmp/XpacketParsingException.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/main/javacc/pdf_extractor.jj
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/javacc/pdf_extractor.jj?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/javacc/pdf_extractor.jj (added)
+++ pdfbox/trunk/preflight/src/main/javacc/pdf_extractor.jj Sun Jul 24 14:02:12 2011
@@ -0,0 +1,119 @@
+//-------------------------------------------------------------------------------
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+//-------------------------------------------------------------------------------
+
+options {
+ JAVA_UNICODE_ESCAPE=false;
+ STATIC=false;
+ DEBUG_PARSER=false;
+ DEBUG_TOKEN_MANAGER=false;
+ BUILD_PARSER=false;
+}
+
+// Parser is useless.
+// Only the TokenManager is used to get useful objects (as Trailers) in the
+// order of appearance
+PARSER_BEGIN(Extractor)
+
+package org.apache.padaf.preflight.javacc.extractor;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+ public class Extractor {}
+ PARSER_END(Extractor)
+
+ TOKEN_MGR_DECLS: {
+ private List<String> lTrailers = new ArrayList<String>(1);
+ private StringBuilder aTrailer = null;
+ private boolean alreadyParsed = false;
+
+ public static void main (String [] args) throws FileNotFoundException {
+ FileInputStream sr = new FileInputStream(args[0]);
+ SimpleCharStream scs = new SimpleCharStream(sr);
+ ExtractorTokenManager extractor = new ExtractorTokenManager(scs);
+
+ for (Token t = extractor.getNextToken();t.kind != EOF;t = extractor.getNextToken()) {
+ System.out.println(t.image);
+ }
+
+ for (String s : extractor.lTrailers){
+ System.err.println(s);
+ }
+ }
+
+ public void parse() {
+ if (alreadyParsed) { return; }
+ for (Token t = getNextToken();t.kind != EOF;t = getNextToken()) {
+
+ }
+ alreadyParsed = true;
+ }
+
+ public List<String> getAllTrailers() {
+ return this.lTrailers;
+ }
+}
+
+// --------------------------------------------------
+// ---- COMMON TOKENS
+// --------------------------------------------------
+
+<DEFAULT, WithinTrailer> TOKEN :
+{
+ < PDF_EOF : "%%EOF" > |
+ < EOL : (<CR>|<LF>|(<CR><LF>)) > |
+ <#CR : "\r"> |
+ <#LF : "\n">
+}
+
+// ---- Even if the "ISO190005 App Notes" says that only PDF-1.[1-4] should be alloewd,
+// the "ISO 190005-1:2005" says : "The version number in the header of a PDF file nor
+// the value of the Version key in the document catalog dictionaryy shall be used in
+// determining whether a file is in accordance with this part of ISO 190005 "
+TOKEN :
+{
+ < PERCENT: "%" > |
+ < PDFA_HEADER: "PDF-1."["1"-"6"] >
+}
+
+MORE :
+{
+ < ~[] >
+}
+
+// --------------------------------------------------
+// ---- TRAILER TOKENS
+// --------------------------------------------------
+TOKEN :
+{
+ < START_TRAILER : (<EOL>)"trailer"(<EOL>) > { aTrailer = new StringBuilder(50);} : WithinTrailer
+}
+
+<WithinTrailer> MORE :
+{
+ <TRAILER_DICT : ~[] > { aTrailer.append(image.charAt(image.length()-1));}
+}
+
+<WithinTrailer> TOKEN :
+{
+ < END_TRAILER : "startxref"(<EOL>) > { lTrailers.add(aTrailer.toString()); aTrailer = null; } : DEFAULT
+}
\ No newline at end of file
Added: pdfbox/trunk/preflight/src/main/javacc/pdf_full_grammar.jj
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/javacc/pdf_full_grammar.jj?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/javacc/pdf_full_grammar.jj (added)
+++ pdfbox/trunk/preflight/src/main/javacc/pdf_full_grammar.jj Sun Jul 24 14:02:12 2011
@@ -0,0 +1,491 @@
+//-------------------------------------------------------------------------------
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+//-------------------------------------------------------------------------------
+
+options {
+ JAVA_UNICODE_ESCAPE=false;
+ STATIC=false;
+ DEBUG_PARSER=false;
+ DEBUG_TOKEN_MANAGER=false;
+}
+
+PARSER_BEGIN(PDFParser)
+
+package org.apache.padaf.preflight.javacc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.padaf.preflight.HeaderParseException;
+import org.apache.padaf.preflight.BodyParseException;
+import org.apache.padaf.preflight.CrossRefParseException;
+import org.apache.padaf.preflight.TrailerParseException;
+import org.apache.padaf.preflight.PdfParseException;
+
+import static org.apache.padaf.preflight.ValidationConstants.*;
+
+public class PDFParser
+{
+
+ public static boolean parse (InputStream is) throws IOException,ParseException {
+ PDFParser parser = new PDFParser (is);
+ parser.PDF();
+ return true;
+ }
+
+ public static void main (String [] args) {
+ PDFParser parser;
+ String filename = null;
+ long initTime = 0;
+ long parseTime = 0;
+ long startTime = 0;
+ long stopTime = 0;
+ if (args.length == 0)
+ {
+ System.out.println("PDF Parser . . .");
+ parser = new PDFParser(System.in);
+ } else if (args.length == 1)
+ {
+ filename = args[0];
+ System.out.println("PDF Parser : Reading from file " + filename + " . . .");
+ try
+ {
+ startTime = System.currentTimeMillis();
+ parser = new PDFParser(new java.io.FileInputStream(filename));
+ stopTime = System.currentTimeMillis();
+ initTime = stopTime - startTime;
+ } catch (java.io.FileNotFoundException e)
+ {
+ System.out.println("PDF Parser : File " + filename + " not found.");
+ return;
+ }
+ } else
+ {
+ System.out.println("PDF Parser : Usage is one of:");
+ System.out.println(" java PDFParser < inputfile");
+ System.out.println("OR");
+ System.out.println(" java PDFParser inputfile");
+ return;
+ }
+ try
+ {
+ startTime = System.currentTimeMillis();
+
+ parser.PDF();
+
+ stopTime = System.currentTimeMillis();
+ parseTime = stopTime - startTime;
+ System.out.println("PDF Parser ");
+ System.out.print(" PDF Parser parsed " + filename + " successfully in " + (initTime + parseTime) + " ms.");
+ System.out.println(" Init. : " + initTime + " ms / parse time : " + parseTime + " ms");
+ } catch (ParseException e)
+ {
+ e.printStackTrace(System.out);
+ System.out.println("PDF Parser : Encountered errors during parse.");
+ }
+ }
+}
+PARSER_END(PDFParser)
+
+// --------------------------------------------------
+// ---- COMMON TOKENS
+// ---- OTHER_WHITE_SPACE : "\u0000"|"\u0009"|"\u000C" == NULL, HORIZONTAL TAB, FORM FEED
+// --------------------------------------------------
+
+<DEFAULT, WithinTrailer, CrossRefTable> TOKEN :
+{
+ < SPACE : " " > |
+ < OTHER_WHITE_SPACE : "\u0000"|"\u0009"|"\u000C" > |
+ < EOL : "\n"|"\r"|"\r\n">
+}
+
+// --------------------------------------------------
+// ---- HEADER TOKENS
+// ---- Even if the "ISO190005 App Notes" says that only PDF-1.[1-4] should be alloewd,
+// the "ISO 190005-1:2005" says : "The version number in the header of a PDF file nor
+// the value of the Version key in the document catalog dictionaryy shall be used in
+// determining whether a file is in accordance with this part of ISO 190005 "
+// --------------------------------------------------
+
+TOKEN :
+{
+ < PERCENT: "%" > |
+ < PDFA_HEADER: "PDF-1."["1"-"6"] > |
+ < BINARY_TAG : (["\u0080"-"\uFFFF"]){4,} >
+}
+
+
+// --------------------------------------------------
+// ---- BODY / OBJECT TOKENS
+// --------------------------------------------------
+
+TOKEN :
+{
+ <HTML_OPEN: "<"(<UPPERLETTER>|<LOWERLETTER>)+">"> |
+ <HTML_CLOSE: "</"(<UPPERLETTER>|<LOWERLETTER>)+">"> |
+ <END_OBJECT: ( < EOL > )+"endobj"( < EOL > )> |
+ <STREAM: "stream"("\n"|"\r\n") > : WithinStream
+}
+
+TOKEN :
+{
+ <OBJ_BOOLEAN : ("true"|"false") > |
+ <OBJ_NUMERIC : ("+"|"-")? ( ((<DIGITS>)+ ("."(<DIGITS>)*)? ) | ("."(<DIGITS>)+)) > |
+ <OBJ_STRING_HEX : "<"((<DIGITS>|["a"-"f"]|["A"-"F"]){2})+">"> |
+ <OBJ_STRING_LIT : "("(~[")","("])*> : WithinLIT |
+ <OBJ_ARRAY_START : "[" > |
+ <OBJ_ARRAY_END : "]" > |
+ <OBJ_NAME : "/"(~[" " , "(" , ")" , "[" , "]" , "{" , "}" , "/" , "<" , ">" , "%" , "\t" , "\n" , "\r"])+ > |
+ <OBJ_NULL: "null" > |
+ <OBJ_REF : ( ["1"-"9"](<DIGITS>)*(<SPACE>|<OTHER_WHITE_SPACE>)("0"|["1"-"9"](<DIGITS>)+)(<SPACE>|<OTHER_WHITE_SPACE>)"R" ) > |
+ <START_OBJECT: ["1"-"9"](<DIGITS>)*(<SPACE>|<OTHER_WHITE_SPACE>)("0"|["1"-"9"](<DIGITS>)+)((<SPACE>|<OTHER_WHITE_SPACE>)"obj")( < EOL > )> |
+ <#DIGITS : ["0"-"9"] > |
+ <#LOWERLETTER : ["a"-"z"] > |
+ <#UPPERLETTER : ["A"-"Z"] >
+}
+
+<WithinLIT> MORE :
+{
+ <~["(", ")"]>
+}
+
+// -- Content of Stream isn't check by the JavaCC Parser
+// -- Will be done by the PDFBox API
+
+<WithinStream> MORE :
+{
+ <~[]>
+}
+
+// -- End of Stream, return to the OBJECT Lexical State
+<WithinStream> TOKEN :
+{
+ <END_STREAM: "endstream" > : DEFAULT
+}
+
+// --------------------------------------------------
+// ---- CROSS REFERENCE TABLE TOKENS
+// --------------------------------------------------
+
+TOKEN :
+{
+ < XREF_TAG : "xref" > : CrossRefTable
+}
+
+<CrossRefTable> TOKEN :
+{
+ < FULL_LINE : (<DIGITS>){10} " " (<DIGITS>){5} " " ["f"-"u"] > |
+ < SUBSECTION_START : (<FIRST_OBJECT_NUMBER> <SPACE> <SUBSECTION_ENTRIES>) > |
+ < #SUBSECTION_ENTRIES : ["1"-"9"](<DIGITS>)* > |
+ < #FIRST_OBJECT_NUMBER : (<DIGITS>)+ > |
+ < TRAILER_TAG: "trailer" > : DEFAULT
+}
+
+// --------------------------------------------------
+// ---- TRAILER / Dictionary TOKENS
+// --------------------------------------------------
+
+TOKEN :
+{
+ < START_DICTONNARY : "<<" > |
+ < END_DICTONNARY : ">>" >
+}
+
+<WithinLIT> TOKEN :
+{
+ < END_LITERAL : ")" > |
+ < INNER_START_LIT : "("(~[")","("])*>
+}
+
+TOKEN :
+{
+ < STARTXREF_TAG : "startxref" > : WithinTrailer
+}
+
+<WithinTrailer> TOKEN :
+{
+ < OBJ_NUMBER: (<DIGITS>)+ > |
+ < EOF_TRAILER_TAG : "%%EOF" > : DEFAULT
+}
+
+void indirect_object() :
+{}
+{
+ <START_OBJECT>
+ object_content()
+ <END_OBJECT> ( < EOL > )?
+}
+
+void object_content() :
+{}
+{
+ (<SPACE>|<OTHER_WHITE_SPACE>|<EOL>)*
+ (
+ ( <OBJ_BOOLEAN> | <OBJ_NUMERIC> {checkNumericLength();} |
+ <OBJ_STRING_HEX> {checkStringHexLength();} |
+ start_literal() | array_of_object() |
+ <OBJ_NAME> {checkNameLength();} |
+ <OBJ_NULL>
+ )(<SPACE>|<OTHER_WHITE_SPACE>)*
+ |
+ (
+ dictionary_object()
+ (<SPACE>|<OTHER_WHITE_SPACE>)*
+ ( (<EOL>) *
+ <STREAM> <END_STREAM>
+ {
+ int i = token.image.indexOf(tokenImage[END_STREAM].substring(1,tokenImage[END_STREAM].length()-1));
+ if (!(token.image.charAt(i-1) == 0x0a || token.image.charAt(i-1) == 0x0d)) {
+ throw new PdfParseException("Expected EOL before \"endstream\"", ERROR_SYNTAX_STREAM_DELIMITER);
+ }
+ }
+ (<SPACE>|<OTHER_WHITE_SPACE>)*
+ )?
+ )
+ )
+}
+
+void array_of_object () :
+{int counter = 0;}
+{
+ <OBJ_ARRAY_START>
+ (
+ <OBJ_BOOLEAN> {++counter;} |
+ <OBJ_NUMERIC> {++counter; checkNumericLength();} |
+ <OBJ_STRING_HEX> {++counter;checkStringHexLength();} |
+ array_of_object() {++counter;} |
+ dictionary_object () {++counter;} |
+ <OBJ_NAME> {++counter; checkNameLength();} |
+ <OBJ_NULL> {++counter;} |
+ <OBJ_REF> {++counter;} |
+ start_literal () {++counter;} |
+ <SPACE>|<OTHER_WHITE_SPACE>|<EOL> {/* space isn't an element */ } ) *
+ <OBJ_ARRAY_END>
+ {if(counter > MAX_ARRAY_ELEMENTS) throw new PdfParseException("Array too long : " + counter, ERROR_SYNTAX_ARRAY_TOO_LONG);}
+}
+
+void start_literal () :
+{}
+{
+ <OBJ_STRING_LIT> literal()
+}
+
+JAVACODE
+void literal() {
+ Token currentToken = null;
+ int nesting = 1;
+ int literalLength = 0;
+ while(true) {
+ currentToken = getToken(1);
+ if (currentToken.kind == 0 ){
+ throw new ParseException("EOF reach before the end of the literal string.");
+ }
+ literalLength += currentToken.image.getBytes().length;
+ if ( currentToken.kind == OBJ_STRING_LIT ) {
+ jj_consume_token(OBJ_STRING_LIT);
+ ++nesting;
+ } else if ( currentToken.kind == INNER_START_LIT ) {
+ jj_consume_token(INNER_START_LIT);
+ ++nesting;
+ } else if ( currentToken.kind == END_LITERAL ) {
+ --nesting;
+ jj_consume_token(END_LITERAL);
+ if (nesting == 0) {
+ this.token_source.curLexState = PDFParserConstants.DEFAULT;
+ break;
+ }
+ } else {
+ currentToken = getNextToken();
+ }
+ }
+ if (literalLength > MAX_STRING_LENGTH) {
+ throw new PdfParseException("Literal String too long", ERROR_SYNTAX_LITERAL_TOO_LONG);
+ }
+}
+
+JAVACODE
+void checkNameLength() throws ParseException {
+ if (token != null && token.image.getBytes().length > MAX_NAME_SIZE) {
+ throw new PdfParseException("Object Name is toot long : " + token.image.getBytes().length, ERROR_SYNTAX_NAME_TOO_LONG);
+ } else {
+ // Nothing to do
+ }
+}
+
+JAVACODE
+void checkStringHexLength() throws ParseException {
+ if (token != null && ((token.image.length()-2)/2) > MAX_STRING_LENGTH) {
+ throw new PdfParseException("Object String Hexa is toot long", ERROR_SYNTAX_HEXA_STRING_TOO_LONG);
+ } else {
+ // Nothing to do
+ }
+}
+
+JAVACODE
+void checkNumericLength() throws ParseException {
+ if (token != null) {
+ String num = token.image;
+ try {
+ long numAsLong = Long.parseLong(num);
+ if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) {
+ throw new PdfParseException("Numeric is too long or too small: " + num, ERROR_SYNTAX_NUMERIC_RANGE);
+ }
+ } catch (NumberFormatException e) {
+ // may be a real, go to the next check
+ try {
+ Double real = Double.parseDouble(num);
+ if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) {
+ throw new PdfParseException("Float is too long or too small: " + num, ERROR_SYNTAX_NUMERIC_RANGE);
+ }
+ } catch (NumberFormatException e2) {
+ // should never happen
+ throw new PdfParseException("Numeric has invalid format " + num, ERROR_SYNTAX_NUMERIC_RANGE);
+ }
+ }
+ } else {
+ // Nothing to do
+ }
+}
+
+
+void dictionary_object () :
+{int tokenNumber = 0;}
+{
+ <START_DICTONNARY>
+ ( <SPACE>|<OTHER_WHITE_SPACE>|<EOL> )*
+ (
+ ( <OBJ_NAME> {++tokenNumber; checkNameLength();}
+ ( <SPACE>|<OTHER_WHITE_SPACE>|<EOL> )*
+ (
+ <OBJ_BOOLEAN> | <OBJ_NAME> {checkNameLength();} |
+ <OBJ_NUMERIC> {checkNumericLength();} |
+ <OBJ_STRING_HEX> {checkStringHexLength();} |
+ start_literal() | array_of_object() |
+ dictionary_object () | <OBJ_NULL> |
+ <OBJ_REF>
+ )
+ {++tokenNumber;}
+ )
+ ( <SPACE>|<OTHER_WHITE_SPACE>|<EOL> )*
+ )*
+ <END_DICTONNARY>
+ {
+ int entries = (int)(tokenNumber / 2);
+ if (entries > MAX_DICT_ENTRIES) {
+ throw new PdfParseException("Too Many Entries In Dictionary : " + entries, ERROR_SYNTAX_TOO_MANY_ENTRIES);
+ }
+ }
+}
+
+
+void PDF_header() throws HeaderParseException :
+{}
+{
+ try {
+ <PERCENT> <PDFA_HEADER> ( < EOL > )
+ <PERCENT> <BINARY_TAG> ( < EOL > )
+ } catch (ParseException e) {
+ throw new HeaderParseException (e);
+ } catch (TokenMgrError e) {
+ throw new HeaderParseException (e.getMessage());
+ }
+
+}
+
+void PDF_body() throws BodyParseException :
+{}
+{
+ try {
+ ( (<SPACE>|<OTHER_WHITE_SPACE>)+ (<EOL>))?
+ ( indirect_object() ) +
+ ( (<SPACE>|<OTHER_WHITE_SPACE>)+ (<EOL>))?
+ } catch (ParseException e) {
+ throw new BodyParseException (e);
+ } catch (TokenMgrError e) {
+ throw new BodyParseException (e.getMessage());
+ }
+}
+
+void PDF_cross_ref_table() throws CrossRefParseException :
+{}
+{
+ try {
+ <XREF_TAG> ( < EOL > )
+ (
+ <SUBSECTION_START>
+ ( < EOL > )
+ ( <FULL_LINE> ( <SPACE> ) ? ( < EOL > ) ) +
+ )+
+ } catch (ParseException e) {
+ throw new CrossRefParseException (e);
+ } catch (TokenMgrError e) {
+ throw new CrossRefParseException (e.getMessage());
+ }
+}
+
+void PDF_trailer() throws TrailerParseException :
+{}
+{
+ try {
+ <TRAILER_TAG>
+ ( <EOL> )
+ dictionary_object() (<SPACE>)*<EOL>
+ <STARTXREF_TAG> ( <EOL> )
+ <OBJ_NUMBER> ( <EOL> )
+ <EOF_TRAILER_TAG> ( <EOL> ) ?
+ } catch (ParseException e) {
+ throw new TrailerParseException (e);
+ } catch (TokenMgrError e) {
+ throw new TrailerParseException (e.getMessage());
+ }
+
+}
+
+void
+PDF_linearized_modified() throws PdfParseException :
+{}
+{
+ try {
+ ( <EOF> |
+ (PDF_body()
+ PDF_cross_ref_table()
+ PDF_trailer())+
+ <EOF> )
+ } catch (PdfParseException e) {
+ throw e;
+ } catch (ParseException e) {
+ throw new TrailerParseException (e);
+ } catch (TokenMgrError e) {
+ throw new TrailerParseException (e.getMessage());
+ }
+}
+
+// -------------------------------------------
+// ---- The PDF grammar productions start here
+// -------------------------------------------
+void
+PDF() throws PdfParseException :
+{}
+{
+ PDF_header()
+ PDF_body()
+ PDF_cross_ref_table()
+ PDF_trailer()
+ PDF_linearized_modified()
+}
\ No newline at end of file
Added: pdfbox/trunk/preflight/src/main/resources/project.version
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/resources/project.version?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/resources/project.version (added)
+++ pdfbox/trunk/preflight/src/main/resources/project.version Sun Jul 24 14:02:12 2011
@@ -0,0 +1 @@
+${project.version}
\ No newline at end of file
Added: pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/Benchmark.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/Benchmark.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/Benchmark.java (added)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/Benchmark.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,104 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+
+import javax.activation.FileDataSource;
+
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.padaf.preflight.PdfAValidator;
+import org.apache.padaf.preflight.PdfAValidatorFactory;
+import org.apache.padaf.preflight.ValidationResult;
+import org.apache.padaf.preflight.ValidationResult.ValidationError;
+
+public class Benchmark {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws Exception {
+ if ( args.length < 3 ) {
+ System.err.println("Usage : Benchmark loop resultFile <file1 ... filen|dir>");
+ System.exit(255);
+ }
+
+ Integer loop = Integer.parseInt(args[0]);
+ FileWriter resFile = new FileWriter(new File(args[1]));
+
+ List<File> lfd = new ArrayList<File>();
+ for (int i = 2; i < args.length ; ++i) {
+ File fi = new File(args[i]);
+ if (fi.isDirectory()) {
+ Collection<File> cf = FileUtils.listFiles(fi, null, true); // Get All files contained by the dir
+ lfd.addAll(cf);
+ } else {
+ lfd.add(fi);
+ }
+ }
+
+ SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss.Z");
+
+ PdfAValidator validator = new PdfAValidatorFactory().createValidatorInstance(PdfAValidatorFactory.PDF_A_1_b);
+ long startGTime = System.currentTimeMillis();
+
+ int size = lfd.size();
+ for (int i = 0 ; i < loop ; i++) {
+ File file = lfd.get(i%size);
+ long startLTime = System.currentTimeMillis();
+ ValidationResult result = validator.validate(new FileDataSource(file));
+ if (!result.isValid()) {
+ resFile.write(file.getAbsolutePath() + " isn't PDF/A\n");
+ for (ValidationError error : result.getErrorsList()) {
+ resFile.write(error.getErrorCode() + " : " + error.getDetails() +"\n");
+ }
+ }
+ result.closePdf();
+ long endLTime = System.currentTimeMillis();
+ resFile.write(file.getName() + " (ms) : " + (endLTime - startLTime) + "\n");
+ resFile.flush();
+ }
+
+ long endGTime = System.currentTimeMillis();
+
+ resFile.write("Start : " + sdf.format(new Date(startGTime)) +"\n");
+ resFile.write("End : " + sdf.format(new Date(endGTime)) +"\n");
+ resFile.write("Duration (ms) : " + (endGTime - startGTime) +"\n");
+ resFile.write("Average (ms) : " + (int)((endGTime - startGTime)/loop) +"\n");
+
+ System.out.println("Start : " + sdf.format(new Date(startGTime)));
+ System.out.println("End : " + sdf.format(new Date(endGTime)));
+ System.out.println("Duration (ms) : " + (endGTime - startGTime));
+ System.out.println("Average (ms) : " + (int)((endGTime - startGTime)/loop));
+ resFile.flush();
+ IOUtils.closeQuietly(resFile);
+ }
+}
\ No newline at end of file
Propchange: pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/Benchmark.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/ExtractStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/ExtractStream.java?rev=1150373&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/ExtractStream.java (added)
+++ pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/ExtractStream.java Sun Jul 24 14:02:12 2011
@@ -0,0 +1,49 @@
+/*****************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+package org.apache.padaf.preflight;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+public class ExtractStream {
+ public static void main(String[] args) throws Exception {
+ if (args.length != 3) {
+ System.err.println("usage : ExtractStream file objNum objGen");
+ }
+ PDDocument document = PDDocument.load(new FileInputStream(args[0]));
+ COSObject obj = document.getDocument().getObjectFromPool(new COSObjectKey(Integer.parseInt(args[1]),Integer.parseInt(args[2])));
+ if (obj.getObject() instanceof COSStream) {
+ COSStream stream = (COSStream)obj.getObject();
+ InputStream is = stream.getUnfilteredStream();
+ FileOutputStream out = new FileOutputStream("stream.out");
+ IOUtils.copyLarge(is, out);
+ IOUtils.closeQuietly(out);
+ }
+ }
+}
Propchange: pdfbox/trunk/preflight/src/test/java/org/apache/padaf/preflight/ExtractStream.java
------------------------------------------------------------------------------
svn:eol-style = native