You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/02/22 17:00:12 UTC
[tika] 01/03: improve 3d detection comment
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 1338262da11a5b1183b939b7812ce436d4e26348
Author: tallison <ta...@apache.org>
AuthorDate: Fri Feb 11 14:33:26 2022 -0500
improve 3d detection comment
---
.../java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java | 2 ++
.../java/org/apache/tika/parser/pdf/PDFParserTest.java | 14 ++++++++++++++
2 files changed, 16 insertions(+)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 0be6122..a35cb48 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -555,6 +555,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
annotationSubtype = "unknown";
} else if (annotationSubtype.equals(THREE_D) ||
annotation.getCOSObject().containsKey(THREE_DD)) {
+ //To make this stricter, we could get the 3DD stream object and see if the
+ //subtype is U3D or PRC or model/ (prefix for model mime type)
metadata.set(PDF.HAS_3D, true);
}
for (COSDictionary fileSpec :
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index a20111f..b5a792d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.io.File;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
@@ -1359,6 +1360,19 @@ public class PDFParserTest extends TikaTest {
assertEquals("RM1", metadata.get(0).getValues(PDF.ANNOTATION_TYPES)[0]);
}
+ @Test
+ public void test3d() throws Exception {
+ File dir = new File("/home/tallison/Downloads/3d_pdfs");
+ for (File f : dir.listFiles()) {
+ List<Metadata> metadataList = getRecursiveMetadata(f.toPath());
+ String[] vlas = metadataList.get(0).getValues(PDF.HAS_3D);
+ if (vlas != null && vlas.length > 0) {
+ System.out.println("vlas: " + vlas[0]);
+ } else {
+ System.out.println("no: " + f);
+ }
+ }
+ }
/**
@Test
public void testWriteLimit() throws Exception {