You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2024/01/29 16:17:19 UTC
svn commit: r1915455 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/ test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/
Author: tilman
Date: Mon Jan 29 16:17:19 2024
New Revision: 1915455
URL: http://svn.apache.org/viewvc?rev=1915455&view=rev
Log:
PDFBOX-2725: add ClassMap setter and getter, improve StructureElement test
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java?rev=1915455&r1=1915454&r2=1915455&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java Mon Jan 29 16:17:19 2024
@@ -17,7 +17,9 @@
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -25,6 +27,7 @@ import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.PDStructureElementNameTreeNode;
import org.apache.pdfbox.pdmodel.common.COSDictionaryMap;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
@@ -224,4 +227,83 @@ public class PDStructureTreeRoot extends
this.getCOSObject().setItem(COSName.ROLE_MAP, rmDic);
}
+ /**
+ * Sets the ClassMap.
+ *
+ * @return the ClassMap, never null. The elements are either {@link PDAttributeObject} or lists
+ * of it.
+ */
+ public Map<String, Object> getClassMap()
+ {
+ Map<String, Object> classMap = new HashMap<String, Object>();
+ COSDictionary classMapDictionary = this.getCOSObject().getCOSDictionary(COSName.CLASS_MAP);
+ if (classMapDictionary == null)
+ {
+ return classMap;
+ }
+ for (Map.Entry<COSName,COSBase> entry : classMapDictionary.entrySet())
+ {
+ COSName name = entry.getKey();
+ COSBase base = entry.getValue();
+ if (base instanceof COSObject)
+ {
+ base = ((COSObject) base).getObject();
+ }
+ if (base instanceof COSDictionary)
+ {
+ classMap.put(name.getName(), PDAttributeObject.create((COSDictionary) base));
+ }
+ else if (base instanceof COSArray)
+ {
+ COSArray array = (COSArray) base;
+ List<PDAttributeObject> list = new ArrayList<>();
+ for (int i = 0; i < array.size(); ++i)
+ {
+ COSBase base2 = array.getObject(i);
+ if (base2 instanceof COSDictionary)
+ {
+ list.add(PDAttributeObject.create((COSDictionary) base2));
+ }
+ }
+ classMap.put(name.getName(), list);
+ }
+ }
+ return classMap;
+ }
+
+ /**
+ * Sets the ClassMap.
+ *
+ * @param classMap null, or a map whose elements are either {@link PDAttributeObject} or lists
+ * of it.
+ */
+ public void setClassMap(Map<String, Object> classMap)
+ {
+ if (classMap == null || classMap.isEmpty())
+ {
+ this.getCOSObject().removeItem(COSName.CLASS_MAP);
+ return;
+ }
+ COSDictionary classMapDictionary = new COSDictionary();
+ for (Map.Entry<String,Object> entry : classMap.entrySet())
+ {
+ String name = entry.getKey();
+ Object object = entry.getValue();
+ if (object instanceof PDAttributeObject)
+ {
+ classMapDictionary.setItem(name, ((PDAttributeObject) object).getCOSObject());
+ }
+ else if (object instanceof List)
+ {
+ List<PDAttributeObject> list = (List<PDAttributeObject>) object;
+ COSArray array = new COSArray();
+ for (PDAttributeObject attributeObject : list)
+ {
+ array.add(attributeObject.getCOSObject());
+ }
+ classMapDictionary.setItem(name, array);
+ }
+ }
+ this.getCOSObject().setItem(COSName.CLASS_MAP, classMapDictionary);
+ }
}
Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java?rev=1915455&r1=1915454&r2=1915455&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java Mon Jan 29 16:17:19 2024
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdmodel.docume
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
@@ -48,7 +49,7 @@ public class PDStructureElementTest
PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4197.pdf"));
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<Revisions<PDAttributeObject>>();
- checkElement(structureTreeRoot.getK(), attributeSet);
+ checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap());
doc.close();
// collect attributes and check their count.
@@ -63,7 +64,8 @@ public class PDStructureElementTest
// Each element can be an array, a dictionary or a number.
// See PDF specification Table 323 - Entries in a structure element dictionary
- private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet)
+ private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet,
+ Map<String, Object> classMap)
{
if (base instanceof COSArray)
{
@@ -73,7 +75,7 @@ public class PDStructureElementTest
{
base2 = ((COSObject) base2).getObject();
}
- checkElement(base2, attributeSet);
+ checkElement(base2, attributeSet, classMap);
}
}
else if (base instanceof COSDictionary)
@@ -86,10 +88,22 @@ public class PDStructureElementTest
attributeSet.add(attributes);
Revisions<String> classNames = structureElement.getClassNames();
//TODO: modify the test to also check for class names, if we ever have a file.
+
+ // "If both the A and C entries are present and a given attribute is specified by both,
+ // the one specified by the A entry shall take precedence."
+ if (kdict.containsKey(COSName.C) && !kdict.containsKey(COSName.A))
+ {
+ for (int i = 0; i < classNames.size(); ++i)
+ {
+ String className = classNames.getObject(i);
+ // not sure if this is to be done this way
+ Assert.assertTrue("'" + className + "' not in ClassMap " + classMap, classMap.containsKey(className));
+ }
+ }
}
if (kdict.containsKey(COSName.K))
{
- checkElement(kdict.getDictionaryObject(COSName.K), attributeSet);
+ checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap);
}
}
}