You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2021/03/21 18:41:18 UTC

svn commit: r1887907 - in /pdfbox/trunk/xmpbox/src: main/java/org/apache/xmpbox/xml/DomXmpParser.java test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java test/resources/undefinedxmp/ test/resources/undefinedxmp/prism.xmp

Author: msahyoun
Date: Sun Mar 21 18:41:18 2021
New Revision: 1887907

URL: http://svn.apache.org/viewvc?rev=1887907&view=rev
Log:
PDFBOX-5128: initial support for parsing arbritary XMPs

Added:
    pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java
    pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/
    pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/prism.xmp
Modified:
    pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java?rev=1887907&r1=1887906&r2=1887907&view=diff
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java (original)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java Sun Mar 21 18:41:18 2021
@@ -216,11 +216,7 @@ public class DomXmpParser
             for (int i = 0; i < nnm.getLength(); i++)
             {
                 Attr attr = (Attr) nnm.item(i);
-                if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
-                {
-                    // do nothing
-                }
-                else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
+                if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
                         && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
                 {
                     // do nothing
@@ -229,6 +225,21 @@ public class DomXmpParser
                 {
                     // do nothing
                 }
+                else if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
+                {
+                    if (!strictParsing)
+                    {
+                        // Add the schema on the fly if it can't be found
+                        String prefix = attr.getLocalName();
+                        String namespace = attr.getValue();
+                        
+                        XMPSchema schema = xmp.getSchema(namespace);
+                        if (schema == null && tm.getSchemaFactory(namespace) == null)
+                        {
+                            tm.addNewNameSpace(namespace, prefix);
+                        }
+                    }
+                }
                 else
                 {
                     parseDescriptionRootAttr(xmp, description, attr, tm);

Added: pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java?rev=1887907&view=auto
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java (added)
+++ pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/TestXMPWithUndefinedSchemas.java Sun Mar 21 18:41:18 2021
@@ -0,0 +1,63 @@
+/*****************************************************************************
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * 
+ ****************************************************************************/
+
+package org.apache.xmpbox;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+import java.io.InputStream;
+import java.util.stream.Stream;
+
+
+import org.apache.xmpbox.xml.DomXmpParser;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+class TestXMPWithUndefinedSchemas
+{
+    static Stream<Arguments> initializeParameters() throws Exception
+    {
+        return Stream.of(
+            Arguments.of("/undefinedxmp/prism.xmp", "http://prismstandard.org/namespaces/basic/2.0/", "aggregationType", "journal")
+        );
+    }
+
+    @ParameterizedTest
+    @MethodSource("initializeParameters")
+    void main(String path, String namespace, String propertyName, String propertyValue) throws Exception
+    {
+        InputStream is = this.getClass().getResourceAsStream(path);
+
+        DomXmpParser builder = new DomXmpParser();
+        builder.setStrictParsing(false);
+        XMPMetadata rxmp = builder.parse(is);
+        // ensure basic parsing was OK
+        assertTrue(rxmp.getAllSchemas().size()>0, "There should be a least one schema");
+        assertNotNull(rxmp.getSchema(namespace), "The schema for {" + namespace + "} should be available");
+        assertNotNull(rxmp.getSchema(namespace).getProperty(propertyName), "The schema for {" + namespace + "} should have a property {" + propertyName + "} ");
+        assertEquals(rxmp.getSchema(namespace).getProperty(propertyName).getPropertyName(), propertyName,  "The schema for {" + namespace + "} should have a property {" + propertyName + "} ");
+        assertEquals(rxmp.getSchema(namespace).getUnqualifiedTextPropertyValue(propertyName), propertyValue,  "The property {" + propertyName + "} should have a value of {" + propertyValue + "}");
+    }
+}

Added: pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/prism.xmp
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/prism.xmp?rev=1887907&view=auto
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/prism.xmp (added)
+++ pdfbox/trunk/xmpbox/src/test/resources/undefinedxmp/prism.xmp Sun Mar 21 18:41:18 2021
@@ -0,0 +1,22 @@
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 4.2.1-c043 52.372728, 2009/01/18-15:56:37        ">
+   <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+      <rdf:Description rdf:about=""
+            xmlns:prism="http://prismstandard.org/namespaces/basic/2.0/">
+         <prism:aggregationType>journal</prism:aggregationType>
+         <prism:publicationName>Analytica Chimica Acta</prism:publicationName>
+         <prism:copyright>© 2010 Elsevier B.V. All rights reserved.</prism:copyright>
+         <prism:issn>0003-2670</prism:issn>
+         <prism:volume>675</prism:volume>
+         <prism:number>1</prism:number>
+         <prism:coverDisplayDate>18 August 2010</prism:coverDisplayDate>
+         <prism:coverDate>2010-08-18</prism:coverDate>
+         <prism:pageRange>91-96</prism:pageRange>
+         <prism:startingPage>91</prism:startingPage>
+         <prism:endingPage>96</prism:endingPage>
+         <prism:doi>10.1016/j.aca.2010.07.015</prism:doi>
+         <prism:url>http://dx.doi.org/10.1016/j.aca.2010.07.015</prism:url>
+      </rdf:Description>
+   </rdf:RDF>
+</x:xmpmeta>       
+<?xpacket end="w"?>
\ No newline at end of file