You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/11/18 15:37:45 UTC

[tika] branch main updated (be8a121 -> d8fc8d4)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from be8a121  TIKA-3591 -- revert override of commons-io bundle export versions
     new fabadd3  TIKA-3591 -- revert revert override of commons-io bundle export versions
     new d8fc8d4  TIKA-3595 -- avoid importing and embedding the same dependencies

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tika-bundles/tika-bundle-standard/pom.xml          | 36 ----------------------
 .../test/java/org/apache/tika/bundle/BundleIT.java | 17 +++++-----
 tika-core/pom.xml                                  |  4 ++-
 3 files changed, 10 insertions(+), 47 deletions(-)

[tika] 01/02: TIKA-3591 -- revert revert override of commons-io bundle export versions

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit fabadd31728013c6d90c72502a2e7a427d642ae0
Author: tallison <ta...@apache.org>
AuthorDate: Thu Nov 18 10:35:19 2021 -0500

    TIKA-3591 -- revert revert override of commons-io bundle export versions
---
 tika-core/pom.xml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index 6a0ff5e..f534ae3 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -150,7 +150,9 @@
               org.apache.tika.config.TikaActivator
             </Bundle-Activator>
             <Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy>
-            <Import-Package>!sun.misc,org.apache.xerces.util;resolution:=optional,*</Import-Package>
+            <Import-Package>!sun.misc,org.apache.xerces.util;resolution:=optional,
+              org.apache.commons.io.*;version="[2,3)",
+              *</Import-Package>
             <Export-Package>
               org.apache.tika.*
             </Export-Package>

[tika] 02/02: TIKA-3595 -- avoid importing and embedding the same dependencies

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d8fc8d4c1c026967c6c6571a4cd6bc387a7afc51
Author: tallison <ta...@apache.org>
AuthorDate: Thu Nov 18 10:36:21 2021 -0500

    TIKA-3595 -- avoid importing and embedding the same dependencies
---
 tika-bundles/tika-bundle-standard/pom.xml          | 36 ----------------------
 .../test/java/org/apache/tika/bundle/BundleIT.java | 17 +++++-----
 2 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/tika-bundles/tika-bundle-standard/pom.xml b/tika-bundles/tika-bundle-standard/pom.xml
index 5bbf024..c734691 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -216,16 +216,9 @@
               !org.junit,
               !org.junit.*,
               !junit.*,
-              org.apache.tika.detect,
-              org.apache.tika.mime,
               org.apache.tika.fork,
               android.util;resolution:=optional,
-              com.adobe.xmp;resolution:=optional,
-              com.adobe.xmp.impl;resolution:=optional,
-              com.adobe.xmp.options;resolution:=optional,
-              com.adobe.xmp.properties;resolution:=optional,
               com.apple.eawt;resolution:=optional,
-              com.dd.plist;resolution:=optional,
               com.github.luben.zstd;resolution:=optional,
               com.github.jaiimageio.*;resolution:=optional,
               com.google.common.util.concurrent.internal;resolution:=optional,
@@ -233,7 +226,6 @@
               com.google.errorprone.annotations.concurrent;resolution:=optional,
               com.google.protobuf;resolution:=optional,
               com.ibm.icu.text;resolution:=optional,
-              com.parso;resolution:=optional,
               com.sleepycat.je;resolution:=optional,
               com.sun.javadoc;resolution:=optional,
               com.sun.xml.bind.marshaller;resolution:=optional,
@@ -268,9 +260,7 @@
               org.apache.batik.ext.awt.image.renderable;resolution:=optional,
               org.apache.batik.gvt;resolution:=optional,
               org.apache.batik.util;resolution:=optional,
-              org.apache.commons.exec;resolution:=optional,
               org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
-              org.apache.pdfbox.debugger;resolution:=optional,
               org.apache.tools.ant;resolution:=optional,
               org.apache.tools.ant.taskdefs;resolution:=optional,
               org.apache.tools.ant.types;resolution:=optional,
@@ -290,27 +280,6 @@
               org.apache.xml.security.utils;resolution:=optional,
               org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
               org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
-              org.bouncycastle.asn1.bsi;resolution:=optional,
-              org.bouncycastle.asn1.cmp;resolution:=optional,
-              org.bouncycastle.asn1.cms;resolution:=optional,
-              org.bouncycastle.asn1.cms.ecc;resolution:=optional,
-              org.bouncycastle.asn1.crmf;resolution:=optional,
-              org.bouncycastle.asn1.cryptlib;resolution:=optional,
-              org.bouncycastle.asn1.cryptopro;resolution:=optional,
-              org.bouncycastle.asn1.dvcs;resolution:=optional,
-              org.bouncycastle.asn1.eac;resolution:=optional,
-              org.bouncycastle.asn1.ess;resolution:=optional,
-              org.bouncycastle.asn1.est;resolution:=optional,
-              org.bouncycastle.asn1.sec;resolution:=optional,
-              org.bouncycastle.asn1.smime;resolution:=optional,
-              org.bouncycastle.asn1.tsp;resolution:=optional,
-              org.bouncycastle.cert;resolution:=optional,
-              org.bouncycastle.cert.jcajce;resolution:=optional,
-              org.bouncycastle.cert.ocsp;resolution:=optional,
-              org.bouncycastle.cms.bc;resolution:=optional,
-              org.bouncycastle.operator;resolution:=optional,
-              org.bouncycastle.operator.bc;resolution:=optional,
-              org.bouncycastle.tsp;resolution:=optional,
               org.brotli.dec;resolution:=optional,
               org.cyberneko.html.xercesbridge;resolution:=optional,
               org.etsi.uri.x01903.v14;resolution:=optional,
@@ -322,8 +291,6 @@
               org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
               org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
               org.osgi.framework;resolution:=optional,
-              org.quartz;resolution:=optional,
-              org.quartz.impl;resolution:=optional,
               org.slf4j;resolution:=optional,
               org.slf4j.helpers;resolution:=optional,
               org.w3c.dom;resolution:=optional,
@@ -335,7 +302,6 @@
               sun.java2d.cmm.kcms;resolution:=optional,
               sun.misc;resolution:=optional,
               sun.nio.ch;resolution:=optional,
-              com.jmatio.io;resolution:=optional,
               colorspace;resolution:=optional,
               com.sun.jna;resolution:=optional,
               com.sun.jna.ptr;resolution:=optional,
@@ -359,8 +325,6 @@
               com.google.common.base;resolution:=optional,
               com.google.common.math;resolution:=optional,
               sun.reflect.generics.reflectiveObjects;resolution:=optional,
-              org.apache.commons.logging;resolution:=optional,
-              org.apache.log4j;resolution:=optional,
               *
             </Import-Package>
           </instructions>
diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index a0e3f26..1635476 100644
--- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -35,6 +35,7 @@ import java.io.StringWriter;
 import java.io.Writer;
 import java.net.URISyntaxException;
 import java.nio.file.Paths;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.jar.Attributes;
@@ -53,6 +54,7 @@ import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
@@ -182,7 +184,6 @@ public class BundleIT {
     }
 
     @Test
-    //@Ignore("until we can figure out why OverrideDetector is not loaded by osgi")
     public void testBundleDetectors() throws Exception {
         //For some reason, the detector created by OSGi has a flat
         //list of detectors, whereas the detector created by the traditional
@@ -259,17 +260,15 @@ public class BundleIT {
         try (InputStream stream = new FileInputStream("src/test/resources/testOCR.jpg")) {
             tesseractParser.parse(stream, handler, new Metadata(), context);
         }
-
     }
 
     @Test
     public void testTikaBundle() throws Exception {
-        Tika tika = new Tika();
 
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
 
@@ -300,12 +299,11 @@ public class BundleIT {
 
     @Test
     public void testPoiTikaBundle() throws Exception {
-        Tika tika = new Tika();
 
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
 
@@ -320,12 +318,10 @@ public class BundleIT {
     @Test
     @Ignore
     public void testAll() throws Exception {
-        Tika tika = new Tika();
-
         // Package extraction
         ContentHandler handler = new BodyContentHandler();
 
-        Parser parser = tika.getParser();
+        Parser parser = new AutoDetectParser(defaultParser);
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
         Set<String> needToFix = new HashSet<>();
@@ -345,11 +341,12 @@ public class BundleIT {
             } catch (EncryptedDocumentException e) {
                 //swallow
             } catch (SAXException e) {
-                //
+                //swallow
             } catch (TikaException e) {
                 System.err.println("tika Exception " + f.getName());
                 e.printStackTrace();
             }
+            System.out.println(Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
         }
     }