You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by gb...@apache.org on 2014/06/20 22:58:41 UTC
svn commit: r1604276 - in /pdfbox/trunk/xmpbox/src: main/java/org/apache/xmpbox/xml/DomXmpParser.java test/java/org/apache/xmpbox/parser/DeserializationTest.java test/resources/validxmp/only_space_fields.xmp

Author: gbailleul
Date: Fri Jun 20 20:58:41 2014
New Revision: 1604276

URL: http://svn.apache.org/r1604276
Log:
PDFBOX-1995: Do not trim the text in a node if this text is the unique child of the node. Made some rework on removeComments. Test added

Added:
    pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp
Modified:
    pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java

Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java?rev=1604276&r1=1604275&r2=1604276&view=diff
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java (original)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java Fri Jun 20 20:58:41 2014
@@ -125,7 +125,7 @@ public class DomXmpParser
         XMPMetadata xmp = null;
 
         // Start reading
-        removeComments(document.getFirstChild());
+        removeComments(document);
         Node node = document.getFirstChild();
 
         // expect xpacket processing instruction
@@ -716,37 +716,36 @@ public class DomXmpParser
     /**
      * Remove all the comments node in the parent element of the parameter
      * 
-     * @param node
+     * @param root
      *            the first node of an element or document to clear
      */
     private void removeComments(Node root)
     {
-        Node node = root;
-        while (node != null)
-        {
-            Node next = node.getNextSibling();
+        if (root.getChildNodes().getLength()<=1) {
+            // There is only one node so we do not remove it
+            return;
+        }
+        NodeList nl = root.getChildNodes();
+        for (int i=0; i < nl.getLength() ; i ++) {
+            Node node = nl.item(i);
             if (node instanceof Comment)
             {
                 // remove the comment
-                node.getParentNode().removeChild(node);
+                root.removeChild(node);
             }
             else if (node instanceof Text)
             {
-                Text t = (Text) node;
-                if (t.getTextContent().trim().length() == 0)
+                if (((Text)node).getTextContent().trim().length() == 0)
                 {
-                    // XXX is there a better way to remove useless Text ?
-                    node.getParentNode().removeChild(node);
+                        root.removeChild(node);
                 }
             }
             else if (node instanceof Element)
             {
                 // clean child
-                removeComments(node.getFirstChild());
+                removeComments(node);
             } // else do nothing
-            node = next;
         }
-        // end of document
     }
 
     private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, String name,

Modified: pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java?rev=1604276&r1=1604275&r2=1604276&view=diff
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java (original)
+++ pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java Fri Jun 20 20:58:41 2014
@@ -340,4 +340,17 @@ public class DeserializationTest
 
     }
 
+    @Test
+    public void testSpaceTextValues () throws Exception {
+        // check values with spaces at start or end
+        // in this case, the value should not be trimmed
+        InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/only_space_fields.xmp");
+        DomXmpParser xdb = new DomXmpParser();
+        XMPMetadata meta = xdb.parse(is);
+        // check producer
+        Assert.assertEquals(" ", meta.getAdobePDFSchema().getProducer());
+        // check creator tool
+        Assert.assertEquals("Canon ",meta.getXMPBasicSchema().getCreatorTool());
+
+    }
 }

Added: pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp?rev=1604276&view=auto
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp (added)
+++ pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp Fri Jun 20 20:58:41 2014
@@ -0,0 +1,28 @@
+<!-- ! Licensed to the Apache Software Foundation (ASF) under one or more
+	! contributor license agreements. See the NOTICE file distributed with !
+	this work for additional information regarding copyright ownership. ! The
+	ASF licenses this file to You under the Apache License, Version 2.0 ! (the
+	"License"); you may not use this file except in compliance with ! the License.
+	You may obtain a copy of the License at ! ! http://www.apache.org/licenses/LICENSE-2.0
+	! ! Unless required by applicable law or agreed to in writing, software !
+	distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+	WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+	License for the specific language governing permissions and ! limitations
+	under the License. ! -->
+<?xpacket begin="ï»¿" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+        <rdf:Description rdf:about="" xmlns:xap="http://ns.adobe.com/xap/1.0/">
+            <xap:CreatorTool>Canon </xap:CreatorTool>
+            <xap:CreateDate>2014-01-23T20:09:45+01:00</xap:CreateDate>
+        </rdf:Description>
+        <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
+            <pdf:Producer> </pdf:Producer>
+        </rdf:Description>
+        <rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
+            <pdfaid:part>1</pdfaid:part>
+            <pdfaid:conformance>B</pdfaid:conformance>
+        </rdf:Description>
+    </rdf:RDF>
+</x:xmpmeta>
+<?xpacket end="w"?>
\ No newline at end of file