You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/11/18 15:37:47 UTC

[tika] 02/02: TIKA-3595 -- avoid importing and embedding the same dependencies

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d8fc8d4c1c026967c6c6571a4cd6bc387a7afc51
Author: tallison <ta...@apache.org>
AuthorDate: Thu Nov 18 10:36:21 2021 -0500

    TIKA-3595 -- avoid importing and embedding the same dependencies
---
 tika-bundles/tika-bundle-standard/pom.xml          | 36 ----------------------
 .../test/java/org/apache/tika/bundle/BundleIT.java | 17 +++++-----
 2 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/tika-bundles/tika-bundle-standard/pom.xml b/tika-bundles/tika-bundle-standard/pom.xml
index 5bbf024..c734691 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -216,16 +216,9 @@
               !org.junit,
               !org.junit.*,
               !junit.*,
-              org.apache.tika.detect,
-              org.apache.tika.mime,
               org.apache.tika.fork,
               android.util;resolution:=optional,
-              com.adobe.xmp;resolution:=optional,
-              com.adobe.xmp.impl;resolution:=optional,
-              com.adobe.xmp.options;resolution:=optional,
-              com.adobe.xmp.properties;resolution:=optional,
               com.apple.eawt;resolution:=optional,
-              com.dd.plist;resolution:=optional,
               com.github.luben.zstd;resolution:=optional,
               com.github.jaiimageio.*;resolution:=optional,
               com.google.common.util.concurrent.internal;resolution:=optional,
@@ -233,7 +226,6 @@
               com.google.errorprone.annotations.concurrent;resolution:=optional,
               com.google.protobuf;resolution:=optional,
               com.ibm.icu.text;resolution:=optional,
-              com.parso;resolution:=optional,
               com.sleepycat.je;resolution:=optional,
               com.sun.javadoc;resolution:=optional,
               com.sun.xml.bind.marshaller;resolution:=optional,
@@ -268,9 +260,7 @@
               org.apache.batik.ext.awt.image.renderable;resolution:=optional,
               org.apache.batik.gvt;resolution:=optional,
               org.apache.batik.util;resolution:=optional,
-              org.apache.commons.exec;resolution:=optional,
               org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
-              org.apache.pdfbox.debugger;resolution:=optional,
               org.apache.tools.ant;resolution:=optional,
               org.apache.tools.ant.taskdefs;resolution:=optional,
               org.apache.tools.ant.types;resolution:=optional,
@@ -290,27 +280,6 @@
               org.apache.xml.security.utils;resolution:=optional,
               org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
               org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
-              org.bouncycastle.asn1.bsi;resolution:=optional,
-              org.bouncycastle.asn1.cmp;resolution:=optional,
-              org.bouncycastle.asn1.cms;resolution:=optional,
-              org.bouncycastle.asn1.cms.ecc;resolution:=optional,
-              org.bouncycastle.asn1.crmf;resolution:=optional,
-              org.bouncycastle.asn1.cryptlib;resolution:=optional,
-              org.bouncycastle.asn1.cryptopro;resolution:=optional,
-              org.bouncycastle.asn1.dvcs;resolution:=optional,
-              org.bouncycastle.asn1.eac;resolution:=optional,
-              org.bouncycastle.asn1.ess;resolution:=optional,
-              org.bouncycastle.asn1.est;resolution:=optional,
-              org.bouncycastle.asn1.sec;resolution:=optional,
-              org.bouncycastle.asn1.smime;resolution:=optional,
-              org.bouncycastle.asn1.tsp;resolution:=optional,
-              org.bouncycastle.cert;resolution:=optional,
-              org.bouncycastle.cert.jcajce;resolution:=optional,
-              org.bouncycastle.cert.ocsp;resolution:=optional,
-              org.bouncycastle.cms.bc;resolution:=optional,
-              org.bouncycastle.operator;resolution:=optional,
-              org.bouncycastle.operator.bc;resolution:=optional,
-              org.bouncycastle.tsp;resolution:=optional,
               org.brotli.dec;resolution:=optional,
               org.cyberneko.html.xercesbridge;resolution:=optional,
               org.etsi.uri.x01903.v14;resolution:=optional,
@@ -322,8 +291,6 @@
               org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
               org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
               org.osgi.framework;resolution:=optional,
-              org.quartz;resolution:=optional,
-              org.quartz.impl;resolution:=optional,
               org.slf4j;resolution:=optional,
               org.slf4j.helpers;resolution:=optional,
               org.w3c.dom;resolution:=optional,
@@ -335,7 +302,6 @@
               sun.java2d.cmm.kcms;resolution:=optional,
               sun.misc;resolution:=optional,
               sun.nio.ch;resolution:=optional,
-              com.jmatio.io;resolution:=optional,
               colorspace;resolution:=optional,
               com.sun.jna;resolution:=optional,
               com.sun.jna.ptr;resolution:=optional,
@@ -359,8 +325,6 @@
               com.google.common.base;resolution:=optional,
               com.google.common.math;resolution:=optional,
               sun.reflect.generics.reflectiveObjects;resolution:=optional,
-              org.apache.commons.logging;resolution:=optional,
-              org.apache.log4j;resolution:=optional,
               *
             </Import-Package>
           </instructions>
diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index a0e3f26..1635476 100644
--- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -35,6 +35,7 @@ import java.io.StringWriter;
 import java.io.Writer;
 import java.net.URISyntaxException;
 import java.nio.file.Paths;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.jar.Attributes;
@@ -53,6 +54,7 @@ import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
@@ -182,7 +184,6 @@ public class BundleIT {
     }
 
     @Test
-    //@Ignore("until we can figure out why OverrideDetector is not loaded by osgi")
     public void testBundleDetectors() throws Exception {
         //For some reason, the detector created by OSGi has a flat
         //list of detectors, whereas the detector created by the traditional
@@ -259,17 +260,15 @@ public class BundleIT {
         try (InputStream stream = new FileInputStream("src/test/resources/testOCR.jpg")) {
             tesseractParser.parse(stream, handler, new Metadata(), context);
         }
-
     }
 
     @Test
     public void testTikaBundle() throws Exception {
-        Tika tika = new Tika();
 
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
 
@@ -300,12 +299,11 @@ public class BundleIT {
 
     @Test
     public void testPoiTikaBundle() throws Exception {
-        Tika tika = new Tika();
 
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
 
@@ -320,12 +318,10 @@ public class BundleIT {
     @Test
     @Ignore
     public void testAll() throws Exception {
-        Tika tika = new Tika();
-
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
         Set<String> needToFix = new HashSet<>();
@@ -345,11 +341,12 @@ public class BundleIT {
             } catch (EncryptedDocumentException e) {
                 //swallow
             } catch (SAXException e) {
-                //
+                //swallow
             } catch (TikaException e) {
                 System.err.println("tika Exception " + f.getName());
                 e.printStackTrace();
             }
+            System.out.println(Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
         }
     }