You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/02/22 17:00:12 UTC

[tika] 01/03: improve 3d detection comment

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1338262da11a5b1183b939b7812ce436d4e26348
Author: tallison <ta...@apache.org>
AuthorDate: Fri Feb 11 14:33:26 2022 -0500

    improve 3d detection comment
---
 .../java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java |  2 ++
 .../java/org/apache/tika/parser/pdf/PDFParserTest.java     | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 0be6122..a35cb48 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -555,6 +555,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                         annotationSubtype = "unknown";
                     } else if (annotationSubtype.equals(THREE_D) ||
                             annotation.getCOSObject().containsKey(THREE_DD)) {
+                        //To make this stricter, we could get the 3DD stream object and see if the
+                        //subtype is U3D or PRC or model/ (prefix for model mime type)
                         metadata.set(PDF.HAS_3D, true);
                     }
                     for (COSDictionary fileSpec :
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index a20111f..b5a792d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.io.File;
 import java.io.InputStream;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -1359,6 +1360,19 @@ public class PDFParserTest extends TikaTest {
         assertEquals("RM1", metadata.get(0).getValues(PDF.ANNOTATION_TYPES)[0]);
     }
 
+    @Test
+    public void test3d() throws Exception {
+        File dir  = new File("/home/tallison/Downloads/3d_pdfs");
+        for (File f : dir.listFiles()) {
+            List<Metadata> metadataList = getRecursiveMetadata(f.toPath());
+            String[] vlas = metadataList.get(0).getValues(PDF.HAS_3D);
+            if (vlas != null && vlas.length > 0) {
+                System.out.println("vlas: " + vlas[0]);
+            } else {
+                System.out.println("no: " + f);
+            }
+        }
+    }
     /**
     @Test
     public void testWriteLimit() throws Exception {