You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2024/01/29 16:17:19 UTC

svn commit: r1915455 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/ test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/

Author: tilman
Date: Mon Jan 29 16:17:19 2024
New Revision: 1915455

URL: http://svn.apache.org/viewvc?rev=1915455&view=rev
Log:
PDFBOX-2725: add ClassMap setter and getter, improve StructureElement test

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java?rev=1915455&r1=1915454&r2=1915455&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java Mon Jan 29 16:17:19 2024
@@ -17,7 +17,9 @@
 package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -25,6 +27,7 @@ import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.pdmodel.PDStructureElementNameTreeNode;
 import org.apache.pdfbox.pdmodel.common.COSDictionaryMap;
 import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
@@ -224,4 +227,83 @@ public class PDStructureTreeRoot extends
         this.getCOSObject().setItem(COSName.ROLE_MAP, rmDic);
     }
 
+    /**
+     * Sets the ClassMap.
+     * 
+     * @return the ClassMap, never null. The elements are either {@link PDAttributeObject} or lists
+     * of it.
+     */
+    public Map<String, Object> getClassMap()
+    {
+        Map<String, Object> classMap = new HashMap<String, Object>();
+        COSDictionary classMapDictionary = this.getCOSObject().getCOSDictionary(COSName.CLASS_MAP);
+        if (classMapDictionary == null)
+        {
+            return classMap;
+        }
+        for (Map.Entry<COSName,COSBase> entry : classMapDictionary.entrySet())
+        {
+            COSName name = entry.getKey();
+            COSBase base = entry.getValue();
+            if (base instanceof COSObject)
+            {
+                base = ((COSObject) base).getObject();
+            }
+            if (base instanceof COSDictionary)
+            {
+                classMap.put(name.getName(), PDAttributeObject.create((COSDictionary) base));
+            }
+            else if (base instanceof COSArray)
+            {
+                COSArray array = (COSArray) base;
+                List<PDAttributeObject> list = new ArrayList<>();
+                for (int i = 0; i < array.size(); ++i)
+                {
+                    COSBase base2 = array.getObject(i);
+                    if (base2 instanceof COSDictionary)
+                    {
+                        list.add(PDAttributeObject.create((COSDictionary) base2));
+                    }
+                }
+                classMap.put(name.getName(), list);
+            }
+        }
+        return classMap;
+    }
+
+    /**
+     * Sets the ClassMap.
+     * 
+     * @param classMap null, or a map whose elements are either {@link PDAttributeObject} or lists
+     * of it.
+     */
+    public void setClassMap(Map<String, Object> classMap)
+    {
+        if (classMap == null || classMap.isEmpty())
+        {
+            this.getCOSObject().removeItem(COSName.CLASS_MAP);
+            return;
+        }
+        COSDictionary classMapDictionary = new COSDictionary();
+        for (Map.Entry<String,Object> entry : classMap.entrySet())
+        {
+            String name = entry.getKey();
+            Object object = entry.getValue();
+            if (object instanceof PDAttributeObject)
+            {
+                classMapDictionary.setItem(name, ((PDAttributeObject) object).getCOSObject());
+            }
+            else if (object instanceof List)
+            {
+                List<PDAttributeObject> list = (List<PDAttributeObject>) object;
+                COSArray array = new COSArray();
+                for (PDAttributeObject attributeObject : list)
+                {
+                    array.add(attributeObject.getCOSObject());
+                }
+                classMapDictionary.setItem(name, array);
+            }
+        }
+        this.getCOSObject().setItem(COSName.CLASS_MAP, classMapDictionary);        
+    }
 }

Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java?rev=1915455&r1=1915454&r2=1915455&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java Mon Jan 29 16:17:19 2024
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdmodel.docume
 import java.io.File;
 import java.io.IOException;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
@@ -48,7 +49,7 @@ public class PDStructureElementTest
         PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4197.pdf"));
         PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
         Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<Revisions<PDAttributeObject>>();
-        checkElement(structureTreeRoot.getK(), attributeSet);
+        checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap());
         doc.close();
 
         // collect attributes and check their count.
@@ -63,7 +64,8 @@ public class PDStructureElementTest
 
     // Each element can be an array, a dictionary or a number.
     // See PDF specification Table 323 - Entries in a structure element dictionary
-    private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet)
+    private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet,
+            Map<String, Object> classMap)
     {
         if (base instanceof COSArray)
         {
@@ -73,7 +75,7 @@ public class PDStructureElementTest
                 {
                     base2 = ((COSObject) base2).getObject();
                 }
-                checkElement(base2, attributeSet);
+                checkElement(base2, attributeSet, classMap);
             }
         }
         else if (base instanceof COSDictionary)
@@ -86,10 +88,22 @@ public class PDStructureElementTest
                 attributeSet.add(attributes);
                 Revisions<String> classNames = structureElement.getClassNames();
                 //TODO: modify the test to also check for class names, if we ever have a file.
+                
+                // "If both the A and C entries are present and a given attribute is specified by both, 
+                // the one specified by the A entry shall take precedence."
+                if (kdict.containsKey(COSName.C) && !kdict.containsKey(COSName.A))
+                {
+                    for (int i = 0; i < classNames.size(); ++i)
+                    {
+                        String className = classNames.getObject(i);
+                        // not sure if this is to be done this way
+                        Assert.assertTrue("'" + className + "' not in ClassMap " + classMap, classMap.containsKey(className));
+                    }
+                }
             }
             if (kdict.containsKey(COSName.K))
             {
-                checkElement(kdict.getDictionaryObject(COSName.K), attributeSet);
+                checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap);
             }
         }
     }