You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2020/06/05 05:04:53 UTC

svn commit: r1878494 - in /pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight: metadata/UniquePropertiesValidation.java process/MetadataValidationProcess.java

Author: tilman
Date: Fri Jun  5 05:04:52 2020
New Revision: 1878494

URL: http://svn.apache.org/viewvc?rev=1878494&view=rev
Log:
PDFBOX-4860: check uniqueness of certain properties

Added:
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java   (with props)
Modified:
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java

Added: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java?rev=1878494&view=auto
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java (added)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java Fri Jun  5 05:04:52 2020
@@ -0,0 +1,92 @@
+/** ***************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************************** */
+package org.apache.pdfbox.preflight.metadata;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.preflight.PreflightConstants;
+import org.apache.pdfbox.preflight.ValidationResult;
+import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.ValidationException;
+import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.AdobePDFSchema;
+import org.apache.xmpbox.schema.DublinCoreSchema;
+import org.apache.xmpbox.schema.XMPBasicSchema;
+import org.apache.xmpbox.schema.XMPSchema;
+
+/**
+ * Class which checks that certain metadata properties are unique, see PDFBOX-4860.
+ *
+ * @author Tilman Hausherr
+ *
+ */
+public class UniquePropertiesValidation
+{
+
+    /**
+     * Checks that certain metadata properties are unique.
+     *
+     * @param document the PDF Document
+     * @param metadata the XMP MetaData
+     * @return List of validation errors
+     * @throws ValidationException
+     */
+    public List<ValidationResult.ValidationError> validatePropertiesUniqueness(PDDocument document, XMPMetadata metadata)
+            throws ValidationException
+    {
+        List<ValidationResult.ValidationError> ve = new ArrayList<>();
+
+        if (document == null)
+        {
+            throw new ValidationException("Document provided is null");
+        }
+        analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.CREATOR, ve);
+        analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.TITLE, ve);
+        analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.DESCRIPTION, ve);
+
+        analyzePropertyUniqueness(metadata.getAdobePDFSchema(), AdobePDFSchema.PRODUCER, ve);
+        analyzePropertyUniqueness(metadata.getAdobePDFSchema(), AdobePDFSchema.KEYWORDS, ve);
+
+        analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.CREATORTOOL, ve);
+        analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.CREATEDATE, ve);
+        analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.MODIFYDATE, ve);
+
+        // should any other properties be checked for uniqueness? Let us know.
+
+        return ve;
+    }
+
+    private static void analyzePropertyUniqueness(XMPSchema schema, String propertyName,
+            List<ValidationResult.ValidationError> ve)
+    {
+        if (schema == null)
+        {
+            return;
+        }
+        if (schema.getAllProperties().stream().
+                filter(field -> propertyName.equals(field.getPropertyName())).count() > 1)
+        {
+            ve.add(new ValidationError(PreflightConstants.ERROR_METADATA_PROPERTY_FORMAT,
+                    propertyName + " property is not unique in schema " + schema.getNamespace()));
+        }
+    }
+}

Propchange: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java?rev=1878494&r1=1878493&r2=1878494&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java Fri Jun  5 05:04:52 2020
@@ -42,6 +42,7 @@ import org.apache.pdfbox.preflight.metad
 import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation;
 import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation.DifferentRDFAboutException;
 import org.apache.pdfbox.preflight.metadata.SynchronizedMetaDataValidation;
+import org.apache.pdfbox.preflight.metadata.UniquePropertiesValidation;
 import org.apache.pdfbox.preflight.metadata.XpacketParsingException;
 import org.apache.pdfbox.util.Hex;
 import org.apache.xmpbox.XMPMetadata;
@@ -88,6 +89,10 @@ public class MetadataValidationProcess e
             addValidationErrors(ctx,
                     new SynchronizedMetaDataValidation().validateMetadataSynchronization(document, metadata));
 
+            // Call metadata uniqueness checking
+            addValidationErrors(ctx,
+                    new UniquePropertiesValidation().validatePropertiesUniqueness(document, metadata));
+
             // Call PDF/A Identifier checking
             addValidationErrors(ctx, new PDFAIdentificationValidation().validatePDFAIdentifer(metadata));