You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/11/18 15:37:47 UTC
[tika] 02/02: TIKA-3595 -- avoid importing and embedding the same dependencies
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit d8fc8d4c1c026967c6c6571a4cd6bc387a7afc51
Author: tallison <ta...@apache.org>
AuthorDate: Thu Nov 18 10:36:21 2021 -0500
TIKA-3595 -- avoid importing and embedding the same dependencies
---
tika-bundles/tika-bundle-standard/pom.xml | 36 ----------------------
.../test/java/org/apache/tika/bundle/BundleIT.java | 17 +++++-----
2 files changed, 7 insertions(+), 46 deletions(-)
diff --git a/tika-bundles/tika-bundle-standard/pom.xml b/tika-bundles/tika-bundle-standard/pom.xml
index 5bbf024..c734691 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -216,16 +216,9 @@
!org.junit,
!org.junit.*,
!junit.*,
- org.apache.tika.detect,
- org.apache.tika.mime,
org.apache.tika.fork,
android.util;resolution:=optional,
- com.adobe.xmp;resolution:=optional,
- com.adobe.xmp.impl;resolution:=optional,
- com.adobe.xmp.options;resolution:=optional,
- com.adobe.xmp.properties;resolution:=optional,
com.apple.eawt;resolution:=optional,
- com.dd.plist;resolution:=optional,
com.github.luben.zstd;resolution:=optional,
com.github.jaiimageio.*;resolution:=optional,
com.google.common.util.concurrent.internal;resolution:=optional,
@@ -233,7 +226,6 @@
com.google.errorprone.annotations.concurrent;resolution:=optional,
com.google.protobuf;resolution:=optional,
com.ibm.icu.text;resolution:=optional,
- com.parso;resolution:=optional,
com.sleepycat.je;resolution:=optional,
com.sun.javadoc;resolution:=optional,
com.sun.xml.bind.marshaller;resolution:=optional,
@@ -268,9 +260,7 @@
org.apache.batik.ext.awt.image.renderable;resolution:=optional,
org.apache.batik.gvt;resolution:=optional,
org.apache.batik.util;resolution:=optional,
- org.apache.commons.exec;resolution:=optional,
org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
- org.apache.pdfbox.debugger;resolution:=optional,
org.apache.tools.ant;resolution:=optional,
org.apache.tools.ant.taskdefs;resolution:=optional,
org.apache.tools.ant.types;resolution:=optional,
@@ -290,27 +280,6 @@
org.apache.xml.security.utils;resolution:=optional,
org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
- org.bouncycastle.asn1.bsi;resolution:=optional,
- org.bouncycastle.asn1.cmp;resolution:=optional,
- org.bouncycastle.asn1.cms;resolution:=optional,
- org.bouncycastle.asn1.cms.ecc;resolution:=optional,
- org.bouncycastle.asn1.crmf;resolution:=optional,
- org.bouncycastle.asn1.cryptlib;resolution:=optional,
- org.bouncycastle.asn1.cryptopro;resolution:=optional,
- org.bouncycastle.asn1.dvcs;resolution:=optional,
- org.bouncycastle.asn1.eac;resolution:=optional,
- org.bouncycastle.asn1.ess;resolution:=optional,
- org.bouncycastle.asn1.est;resolution:=optional,
- org.bouncycastle.asn1.sec;resolution:=optional,
- org.bouncycastle.asn1.smime;resolution:=optional,
- org.bouncycastle.asn1.tsp;resolution:=optional,
- org.bouncycastle.cert;resolution:=optional,
- org.bouncycastle.cert.jcajce;resolution:=optional,
- org.bouncycastle.cert.ocsp;resolution:=optional,
- org.bouncycastle.cms.bc;resolution:=optional,
- org.bouncycastle.operator;resolution:=optional,
- org.bouncycastle.operator.bc;resolution:=optional,
- org.bouncycastle.tsp;resolution:=optional,
org.brotli.dec;resolution:=optional,
org.cyberneko.html.xercesbridge;resolution:=optional,
org.etsi.uri.x01903.v14;resolution:=optional,
@@ -322,8 +291,6 @@
org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
org.osgi.framework;resolution:=optional,
- org.quartz;resolution:=optional,
- org.quartz.impl;resolution:=optional,
org.slf4j;resolution:=optional,
org.slf4j.helpers;resolution:=optional,
org.w3c.dom;resolution:=optional,
@@ -335,7 +302,6 @@
sun.java2d.cmm.kcms;resolution:=optional,
sun.misc;resolution:=optional,
sun.nio.ch;resolution:=optional,
- com.jmatio.io;resolution:=optional,
colorspace;resolution:=optional,
com.sun.jna;resolution:=optional,
com.sun.jna.ptr;resolution:=optional,
@@ -359,8 +325,6 @@
com.google.common.base;resolution:=optional,
com.google.common.math;resolution:=optional,
sun.reflect.generics.reflectiveObjects;resolution:=optional,
- org.apache.commons.logging;resolution:=optional,
- org.apache.log4j;resolution:=optional,
*
</Import-Package>
</instructions>
diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index a0e3f26..1635476 100644
--- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -35,6 +35,7 @@ import java.io.StringWriter;
import java.io.Writer;
import java.net.URISyntaxException;
import java.nio.file.Paths;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.jar.Attributes;
@@ -53,6 +54,7 @@ import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
@@ -182,7 +184,6 @@ public class BundleIT {
}
@Test
- //@Ignore("until we can figure out why OverrideDetector is not loaded by osgi")
public void testBundleDetectors() throws Exception {
//For some reason, the detector created by OSGi has a flat
//list of detectors, whereas the detector created by the traditional
@@ -259,17 +260,15 @@ public class BundleIT {
try (InputStream stream = new FileInputStream("src/test/resources/testOCR.jpg")) {
tesseractParser.parse(stream, handler, new Metadata(), context);
}
-
}
@Test
public void testTikaBundle() throws Exception {
- Tika tika = new Tika();
// Package extraction
ContentHandler handler = new BodyContentHandler();
- Parser parser = tika.getParser();
+ Parser parser = new AutoDetectParser(defaultParser);
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
@@ -300,12 +299,11 @@ public class BundleIT {
@Test
public void testPoiTikaBundle() throws Exception {
- Tika tika = new Tika();
// Package extraction
ContentHandler handler = new BodyContentHandler();
- Parser parser = tika.getParser();
+ Parser parser = new AutoDetectParser(defaultParser);
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
@@ -320,12 +318,10 @@ public class BundleIT {
@Test
@Ignore
public void testAll() throws Exception {
- Tika tika = new Tika();
-
// Package extraction
ContentHandler handler = new BodyContentHandler();
- Parser parser = tika.getParser();
+ Parser parser = new AutoDetectParser(defaultParser);
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
Set<String> needToFix = new HashSet<>();
@@ -345,11 +341,12 @@ public class BundleIT {
} catch (EncryptedDocumentException e) {
//swallow
} catch (SAXException e) {
- //
+ //swallow
} catch (TikaException e) {
System.err.println("tika Exception " + f.getName());
e.printStackTrace();
}
+ System.out.println(Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
}
}