You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2024/01/30 09:22:03 UTC
svn commit: r1915473 - in /pdfbox/branches/3.0/pdfbox/src/test: java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/ resources/org/apache/pdfbox/pdmodel/documentinterchange/ resources/org/apache/pdfbox/pdmodel/documentinterchange/logica...
Author: tilman
Date: Tue Jan 30 09:22:02 2024
New Revision: 1915473
URL: http://svn.apache.org/viewvc?rev=1915473&view=rev
Log:
PDFBOX-2725: add test for /ClassMap
Added:
pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/
pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/
pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDFBOX-2725-878725.pdf (with props)
Modified:
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java
Modified: pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java?rev=1915473&r1=1915472&r2=1915473&view=diff
==============================================================================
--- pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java (original)
+++ pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java Tue Jan 30 09:22:02 2024
@@ -16,7 +16,6 @@
*/
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
-import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;
import java.io.IOException;
@@ -30,8 +29,10 @@ import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Assertions;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
/**
@@ -51,22 +52,49 @@ class PDStructureElementTest
void testPDFBox4197() throws IOException
{
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
+ Set<String> classSet = new HashSet<>();
try (PDDocument doc = Loader.loadPDF(new File(TARGETPDFDIR, "PDFBOX-4197.pdf")))
{
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
- checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap());
+ checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
}
// collect attributes and check their count.
assertEquals(117, attributeSet.size());
int cnt = attributeSet.stream().map(attributes -> attributes.size()).reduce(0, Integer::sum);
assertEquals(111, cnt); // this one was 105 before PDFBOX-4197 was fixed
+ assertEquals(0, classSet.size());
+ }
+
+ /**
+ * Check that all classes are caught and are in the /ClassMap
+ *
+ * @throws IOException
+ */
+ @Test
+ void testClassMap() throws IOException
+ {
+ Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
+ Set<String> classSet = new HashSet<>();
+ try (PDDocument doc = Loader.loadPDF(
+ RandomAccessReadBuffer.createBufferFromStream(PDStructureElementTest.class
+ .getResourceAsStream("PDFBOX-2725-878725.pdf"))))
+ {
+ PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
+ checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
+ }
+
+ // collect attributes and check their count.
+ assertEquals(72, attributeSet.size());
+ int cnt = attributeSet.stream().map(attributes -> attributes.size()).reduce(0, Integer::sum);
+ assertEquals(45, cnt);
+ assertEquals(10, classSet.size());
}
// Each element can be an array, a dictionary or a number.
// See PDF specification Table 323 - Entries in a structure element dictionary
private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet,
- Map<String, Object> classMap)
+ Map<String, Object> classMap, Set<String> classSet)
{
if (base instanceof COSArray)
{
@@ -76,7 +104,7 @@ class PDStructureElementTest
{
base2 = ((COSObject) base2).getObject();
}
- checkElement(base2, attributeSet, classMap);
+ checkElement(base2, attributeSet, classMap, classSet);
}
}
else if (base instanceof COSDictionary)
@@ -88,7 +116,6 @@ class PDStructureElementTest
Revisions<PDAttributeObject> attributes = structureElement.getAttributes();
attributeSet.add(attributes);
Revisions<String> classNames = structureElement.getClassNames();
- //TODO: modify the test to also check for class names, if we ever have a file.
// "If both the A and C entries are present and a given attribute is specified by both,
// the one specified by the A entry shall take precedence."
@@ -97,14 +124,14 @@ class PDStructureElementTest
for (int i = 0; i < classNames.size(); ++i)
{
String className = classNames.getObject(i);
- // not sure if this is to be done this way
+ classSet.add(className);
Assertions.assertTrue(classMap.containsKey(className), "'" + className + "' not in ClassMap " + classMap);
}
}
}
if (kdict.containsKey(COSName.K))
{
- checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap);
+ checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap, classSet);
}
}
}
Added: pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDFBOX-2725-878725.pdf
URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDFBOX-2725-878725.pdf?rev=1915473&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDFBOX-2725-878725.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf