You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/26 20:28:36 UTC

[tika] branch branch_1x updated (fc23648 -> 1438d8a)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from fc23648  TIKA-2688 via Yury Kats
     new fe2b3ae  TIKA-2692 -- minimal upgrades to pass ossindex-maven module -- except for tika-nlp module, which requires significant work. fix conflicts
     new 6b37754  TIKA-2692 -- minimal upgrades to allow building w Java 11-ea
     new 1438d8a  TIKA-2692 -- general upgrades in prep for 1.19

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/tika/batch/fs/BatchDriverTest.java  |  5 +-
 tika-bundle/pom.xml                                |  1 +
 tika-example/pom.xml                               |  4 +-
 tika-nlp/pom.xml                                   | 10 ++-
 tika-parent/pom.xml                                | 11 +--
 tika-parsers/pom.xml                               | 62 +++++++++++++----
 .../java/org/apache/tika/parser/pkg/RarParser.java |  3 +-
 tika-server/pom.xml                                | 12 +---
 .../apache/tika/server/resource/TikaDetectors.java |  8 ++-
 .../apache/tika/server/resource/TikaMimeTypes.java | 20 ++++--
 .../apache/tika/server/resource/TikaParsers.java   |  7 +-
 .../apache/tika/server/resource/TikaResource.java  | 80 ++++++++++------------
 .../org/apache/tika/server/TikaDetectorsTest.java  | 16 +++--
 .../org/apache/tika/server/TikaMimeTypesTest.java  | 19 +++--
 .../org/apache/tika/server/TikaParsersTest.java    | 18 +++--
 .../org/apache/tika/server/TikaResourceTest.java   |  5 +-
 16 files changed, 174 insertions(+), 107 deletions(-)


[tika] 03/03: TIKA-2692 -- general upgrades in prep for 1.19

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1438d8a22a634912d7a5ccbfd6f9c3d4543de583
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Jul 26 15:17:38 2018 -0400

    TIKA-2692 -- general upgrades in prep for 1.19
    
    # Conflicts:
    #	tika-dl/pom.xml
    #	tika-parsers/pom.xml
---
 tika-bundle/pom.xml                                |  1 +
 tika-parent/pom.xml                                |  8 ++--
 tika-parsers/pom.xml                               | 56 +++++++++++++++++-----
 .../java/org/apache/tika/parser/pkg/RarParser.java |  3 +-
 4 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index b08cb9e..584d8e8 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -144,6 +144,7 @@
         <configuration>
           <instructions>
             <_runsystempackages>com.sun.xml.bind.marshaller, com.sun.xml.internal.bind.marshaller</_runsystempackages>
+            <_noee>true</_noee>
             <Bundle-Activator>
               org.apache.tika.parser.internal.Activator
             </Bundle-Activator>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 9110f42..baa7ec1 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -308,10 +308,10 @@
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
     <commons.compress.version>1.17</commons.compress.version>
     <commons.io.version>2.6</commons.io.version>
-    <gson.version>2.8.1</gson.version>
+    <gson.version>2.8.5</gson.version>
     <cxf.version>3.2.5</cxf.version>
-    <slf4j.version>1.7.24</slf4j.version>
-    <jackson.version>2.9.5</jackson.version>
+    <slf4j.version>1.7.25</slf4j.version>
+    <jackson.version>2.9.6</jackson.version>
     <jaxb.version>2.3.0</jaxb.version>
     <mockito.version>2.20.0</mockito.version>
   </properties>
@@ -355,7 +355,7 @@
       <plugin>
         <groupId>org.apache.felix</groupId>
         <artifactId>maven-bundle-plugin</artifactId>
-        <version>3.3.0</version>
+        <version>3.5.1</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 38b15db..6b51f30 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -37,17 +37,18 @@
   <properties>
     <poi.version>3.17</poi.version>
     <!-- NOTE: sync codec version with POI -->
-    <codec.version>1.10</codec.version>
+    <codec.version>1.11</codec.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
     <tukaani.version>1.8</tukaani.version>
     <!-- NOTE: sync brotli version with commons-compress in tika-parent-->
     <brotli.version>0.1.2</brotli.version>
-    <mime4j.version>0.8.1</mime4j.version>
+    <mime4j.version>0.8.2</mime4j.version>
     <vorbis.version>0.8</vorbis.version>
     <pdfbox.version>2.0.11</pdfbox.version>
     <jempbox.version>1.8.15</jempbox.version>
     <netcdf-java.version>4.5.5</netcdf-java.version>
     <sis.version>0.8</sis.version>
+    <parso.version>2.0.9</parso.version>
     <!-- used by POI, PDFBox and Jackcess ...try to sync -->
     <bouncycastle.version>1.60</bouncycastle.version>
     <commonsexec.version>1.3</commonsexec.version>
@@ -115,7 +116,7 @@
     <dependency>
       <groupId>com.healthmarketscience.jackcess</groupId>
       <artifactId>jackcess</artifactId>
-      <version>2.1.10</version>
+      <version>2.1.12</version>
       <exclusions>
         <exclusion>
           <groupId>commons-logging</groupId>
@@ -239,6 +240,12 @@
       <groupId>org.apache.poi</groupId>
       <artifactId>poi</artifactId>
       <version>${poi.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-codec</groupId>
+          <artifactId>commons-codec</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.poi</groupId>
@@ -268,17 +275,17 @@
     <dependency>
       <groupId>org.ow2.asm</groupId>
       <artifactId>asm</artifactId>
-      <version>5.0.4</version>
+      <version>6.2</version>
     </dependency>
     <dependency>
       <groupId>com.googlecode.mp4parser</groupId>
       <artifactId>isoparser</artifactId>
-      <version>1.1.18</version>
+      <version>1.1.22</version>
     </dependency>
     <dependency>
       <groupId>com.drewnoakes</groupId>
       <artifactId>metadata-extractor</artifactId>
-      <version>2.10.1</version>
+      <version>2.11.0</version>
     </dependency>
     <dependency>
       <groupId>de.l3s.boilerpipe</groupId>
@@ -309,7 +316,13 @@
     <dependency>
       <groupId>org.codelibs</groupId>
       <artifactId>jhighlight</artifactId>
-      <version>1.0.2</version>
+      <version>1.0.3</version>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-io</groupId>
+          <artifactId>commons-io</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <!-- can't upgrade to java-libpst 0.9.3 because it requires Java 8
          and is buggy with OST TIKA-2415 -->
@@ -321,7 +334,7 @@
     <dependency>
       <groupId>com.github.junrar</groupId>
       <artifactId>junrar</artifactId>
-      <version>1.0.1</version>
+      <version>2.0.0</version>
       <exclusions>
         <exclusion>
           <groupId>commons-logging</groupId>
@@ -363,7 +376,7 @@
     <dependency>
       <groupId>org.apache.opennlp</groupId>
       <artifactId>opennlp-tools</artifactId>
-      <version>1.8.4</version>
+      <version>1.9.0</version>
     </dependency>
 
     <dependency>
@@ -446,6 +459,12 @@
           <groupId>org.jdom</groupId>
           <artifactId>jdom2</artifactId>
         </exclusion>
+        <!--TIKA 2672 exclude jna to resolve the dependency convergence with tika-dl's
+        deeplearning4j-nn:1.0.0-SNAPSHOT-->
+        <exclusion>
+          <groupId>net.java.dev.jna</groupId>
+          <artifactId>jna</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -467,13 +486,22 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <!--TIKA 2672 include a later version of jna as a direct dependency to resolve dependency convergence with tika-dl's
+    deeplearning4j-nn:1.0.0-SNAPSHOT -->
+    <dependency>
+      <groupId>net.java.dev.jna</groupId>
+      <artifactId>jna</artifactId>
+      <version>4.3.0</version>
+    </dependency>
+
     <!-- grib's current jsoup is vulnerable to xss
          exclude and import a more modern version TIKA-2561-->
     <dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
-      <version>1.11.2</version>
-    </dependency>    <dependency>
+      <version>1.11.3</version>
+    </dependency>
+    <dependency>
       <groupId>edu.ucar</groupId>
       <artifactId>cdm</artifactId>
       <version>${netcdf-java.version}</version>
@@ -529,6 +557,10 @@
           <groupId>commons-logging</groupId>
           <artifactId>commons-logging</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>commons-codec</groupId>
+          <artifactId>commons-codec</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -541,7 +573,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-csv</artifactId>
-      <version>1.0</version>
+      <version>1.5</version>
     </dependency>
 
     <dependency>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
index cf80e47..633b2cc 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
@@ -23,6 +23,7 @@ import java.util.Set;
 
 import com.github.junrar.Archive;
 import com.github.junrar.exception.RarException;
+import com.github.junrar.impl.FileVolumeManager;
 import com.github.junrar.rarfile.FileHeader;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
@@ -65,7 +66,7 @@ public class RarParser extends AbstractParser {
         Archive rar = null;
         try (TemporaryResources tmp = new TemporaryResources()) {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
-            rar = new Archive(tis.getFile());
+            rar = new Archive(new FileVolumeManager(tis.getFile()));
 
             if (rar.isEncrypted()) {
                 throw new EncryptedDocumentException();


[tika] 01/03: TIKA-2692 -- minimal upgrades to pass ossindex-maven module -- except for tika-nlp module, which requires significant work. fix conflicts

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit fe2b3ae2d9e8d6ca9fe493fa39c4483b3e6b8f70
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Jul 26 10:40:00 2018 -0400

    TIKA-2692 -- minimal upgrades to pass ossindex-maven module -- except for tika-nlp module, which requires significant work.
    fix conflicts
---
 tika-example/pom.xml                               |  4 +-
 tika-parent/pom.xml                                |  2 +-
 tika-parsers/pom.xml                               |  4 +-
 tika-server/pom.xml                                | 12 +---
 .../apache/tika/server/resource/TikaDetectors.java |  8 ++-
 .../apache/tika/server/resource/TikaMimeTypes.java | 20 ++++--
 .../apache/tika/server/resource/TikaParsers.java   |  7 +-
 .../apache/tika/server/resource/TikaResource.java  | 80 ++++++++++------------
 .../org/apache/tika/server/TikaDetectorsTest.java  | 16 +++--
 .../org/apache/tika/server/TikaMimeTypesTest.java  | 19 +++--
 .../org/apache/tika/server/TikaParsersTest.java    | 18 +++--
 .../org/apache/tika/server/TikaResourceTest.java   |  5 +-
 12 files changed, 109 insertions(+), 86 deletions(-)

diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index 9f555e8..86f5cee 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -89,7 +89,7 @@
     <dependency>
       <groupId>org.apache.jackrabbit</groupId>
       <artifactId>jackrabbit-jcr-server</artifactId>
-      <version>2.3.6</version>
+      <version>2.17.4</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.tika</groupId>
@@ -108,7 +108,7 @@
     <dependency>
       <groupId>org.apache.jackrabbit</groupId>
       <artifactId>jackrabbit-core</artifactId>
-      <version>2.3.6</version>
+      <version>2.17.4</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.tika</groupId>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index ef02d27..bd335e0 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -309,7 +309,7 @@
     <commons.compress.version>1.17</commons.compress.version>
     <commons.io.version>2.6</commons.io.version>
     <gson.version>2.8.1</gson.version>
-    <cxf.version>3.0.16</cxf.version>
+    <cxf.version>3.2.5</cxf.version>
     <slf4j.version>1.7.24</slf4j.version>
     <jackson.version>2.9.5</jackson.version>
     <jaxb.version>2.3.0</jaxb.version>
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index f278302..58a5dfe 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -49,9 +49,9 @@
     <netcdf-java.version>4.5.5</netcdf-java.version>
     <sis.version>0.8</sis.version>
     <!-- used by POI, PDFBox and Jackcess ...try to sync -->
-    <bouncycastle.version>1.54</bouncycastle.version>
+    <bouncycastle.version>1.60</bouncycastle.version>
     <commonsexec.version>1.3</commonsexec.version>
-    <httpcomponents.version>4.5.4</httpcomponents.version>
+    <httpcomponents.version>4.5.6</httpcomponents.version>
   </properties>
 
   <dependencies>
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index 814375c..323ec6a 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -97,17 +97,7 @@
       <artifactId>cxf-rt-rs-security-cors</artifactId>
       <version>${cxf.version}</version>
     </dependency>
-    <dependency>
-      <groupId>javax.mail</groupId>
-      <artifactId>mail</artifactId>
-      <version>1.4.4</version>
-      <exclusions>
-        <exclusion>
-          <groupId>javax.activation</groupId>
-          <artifactId>activation</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
+
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
index e9c4348..e0cd6ba 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
@@ -24,10 +24,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.tika.detect.CompositeDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.server.HTMLHelper;
-import org.eclipse.jetty.util.ajax.JSON;
 
 /**
  * <p>Provides details of all the {@link Detector}s registered with
@@ -35,6 +36,9 @@ import org.eclipse.jetty.util.ajax.JSON;
  */
 @Path("/detectors")
 public class TikaDetectors {
+    private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
+
+
     private HTMLHelper html;
 
     public TikaDetectors() {
@@ -76,7 +80,7 @@ public class TikaDetectors {
     public String getDetectorsJSON() {
         Map<String, Object> details = new HashMap<String, Object>();
         detectorAsMap(TikaResource.getConfig().getDetector(), details);
-        return JSON.toString(details);
+        return GSON.toJson(details);
     }
 
     private void detectorAsMap(Detector d, Map<String, Object> details) {
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
index 0dacdf6..bc8c8ca 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
@@ -26,12 +26,13 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.server.HTMLHelper;
-import org.eclipse.jetty.util.ajax.JSON;
 
 /**
  * <p>Provides details of all the mimetypes known to Apache Tika,
@@ -39,6 +40,9 @@ import org.eclipse.jetty.util.ajax.JSON;
  */
 @Path("/mime-types")
 public class TikaMimeTypes {
+
+    private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
+
     private HTMLHelper html;
 
     public TikaMimeTypes() {
@@ -96,9 +100,9 @@ public class TikaMimeTypes {
         for (MediaTypeDetails type : getMediaTypes()) {
             Map<String, Object> typeDets = new HashMap<String, Object>();
 
-            typeDets.put("alias", type.aliases);
+            typeDets.put("alias", copyToStringArray(type.aliases));
             if (type.supertype != null) {
-                typeDets.put("supertype", type.supertype);
+                typeDets.put("supertype", type.supertype.toString());
             }
             if (type.parser != null) {
                 typeDets.put("parser", type.parser);
@@ -107,7 +111,15 @@ public class TikaMimeTypes {
             details.put(type.type.toString(), typeDets);
         }
 
-        return JSON.toString(details);
+        return GSON.toJson(details);
+    }
+
+    private static String[] copyToStringArray(MediaType[] aliases) {
+        String[] strings = new String[aliases.length];
+        for (int i = 0; i < aliases.length; i++) {
+            strings[i] = aliases[i].toString();
+        }
+        return strings;
     }
 
     @GET
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
index f77de4d..6461cab 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
@@ -28,13 +28,14 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.server.HTMLHelper;
-import org.eclipse.jetty.util.ajax.JSON;
 
 /**
  * <p>Provides details of all the {@link Parser}s registered with
@@ -44,6 +45,7 @@ import org.eclipse.jetty.util.ajax.JSON;
 @Path("/parsers")
 public class TikaParsers {
     private static final ParseContext EMPTY_PC = new ParseContext();
+    private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
     private HTMLHelper html;
 
     public TikaParsers() {
@@ -127,7 +129,8 @@ public class TikaParsers {
     protected String getParsersJSON(boolean withMimeTypes) {
         Map<String, Object> details = new HashMap<String, Object>();
         parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, details);
-        return JSON.toString(details);
+
+        return GSON.toJson(details);
     }
 
     private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) {
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index 0060738..7ee07c3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -17,10 +17,36 @@
 
 package org.apache.tika.server.resource;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
+import org.apache.commons.lang.StringUtils;
+import org.apache.cxf.attachment.ContentDisposition;
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.html.BoilerpipeContentHandler;
+import org.apache.tika.parser.ocr.TesseractOCRConfig;
+import org.apache.tika.parser.pdf.PDFParserConfig;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ExpandedTitleContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
+import org.apache.tika.server.InputStreamFactory;
+import org.apache.tika.server.TikaServerParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
-import javax.mail.internet.ContentDisposition;
-import javax.mail.internet.ParseException;
 import javax.ws.rs.Consumes;
 import javax.ws.rs.GET;
 import javax.ws.rs.POST;
@@ -52,35 +78,8 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.cxf.jaxrs.ext.multipart.Attachment;
-import org.apache.poi.extractor.ExtractorFactory;
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.exception.EncryptedDocumentException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.parser.PasswordProvider;
-import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.html.HtmlParser;
-import org.apache.tika.parser.ocr.TesseractOCRConfig;
-import org.apache.tika.parser.pdf.PDFParserConfig;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.sax.RichTextContentHandler;
-import org.apache.tika.server.InputStreamFactory;
-import org.apache.tika.server.TikaServerParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 @Path("/tika")
 public class TikaResource {
@@ -139,19 +138,14 @@ public class TikaResource {
 
         String disposition = httpHeaders.getFirst("Content-Disposition");
         if (disposition != null) {
-            try {
-                ContentDisposition c = new ContentDisposition(disposition);
+            ContentDisposition c = new ContentDisposition(disposition);
 
-                // only support "attachment" dispositions
-                if ("attachment".equals(c.getDisposition())) {
-                    String fn = c.getParameter("filename");
-                    if (fn != null) {
-                        return fn;
-                    }
+            // only support "attachment" dispositions
+            if ("attachment".equals(c.getType())) {
+                String fn = c.getParameter("filename");
+                if (fn != null) {
+                    return fn;
                 }
-            } catch (ParseException e) {
-                // not a valid content-disposition field
-                LOG.warn("Parse exception {} determining content disposition", e.getMessage(), e);
             }
         }
 
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
index 6bbfbf2..8fc384f 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
@@ -23,8 +23,12 @@ import static org.junit.Assert.assertTrue;
 import javax.ws.rs.core.Response;
 
 import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -32,11 +36,14 @@ import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.parser.microsoft.POIFSContainerDetector;
 import org.apache.tika.parser.pkg.ZipContainerDetector;
 import org.apache.tika.server.resource.TikaDetectors;
-import org.eclipse.jetty.util.ajax.JSON;
 import org.gagravarr.tika.OggDetector;
 import org.junit.Test;
 
 public class TikaDetectorsTest extends CXFTestBase {
+
+    private static final Gson GSON = new GsonBuilder().create();
+
+
     private static final String DETECTORS_PATH = "/detectors";
 
     @Override
@@ -100,7 +107,7 @@ public class TikaDetectorsTest extends CXFTestBase {
                 .get();
 
         String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
-        Map<String, Object> json = (Map<String, Object>) JSON.parse(jsonStr);
+        Map<String, Object> json = (Map<String, Object>) GSON.fromJson(jsonStr, Map.class);
 
         // Should have a nested structure
         assertTrue(json.containsKey("name"));
@@ -110,8 +117,8 @@ public class TikaDetectorsTest extends CXFTestBase {
         assertEquals(Boolean.TRUE, json.get("composite"));
 
         // At least 4 child detectors, none of them composite
-        Object[] children = (Object[]) json.get("children");
-        assertTrue(children.length >= 4);
+        List<Object> children = (List) json.get("children");
+        assertTrue(children.size() >= 4);
         boolean hasOgg = false, hasPOIFS = false, hasZIP = false, hasMime = false;
         for (Object o : children) {
             Map<String, Object> d = (Map<String, Object>) o;
@@ -139,4 +146,5 @@ public class TikaDetectorsTest extends CXFTestBase {
         assertTrue(hasZIP);
         assertTrue(hasMime);
     }
+
 }
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
index b0b47fc..6b2be33 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
@@ -23,16 +23,21 @@ import static org.junit.Assert.assertTrue;
 import javax.ws.rs.core.Response;
 
 import java.io.InputStream;
+import java.util.List;
 import java.util.Map;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
 import org.apache.tika.server.resource.TikaMimeTypes;
-import org.eclipse.jetty.util.ajax.JSON;
 import org.junit.Test;
 
 public class TikaMimeTypesTest extends CXFTestBase {
+
+    private static final Gson GSON = new GsonBuilder().create();
+
     private static final String MIMETYPES_PATH = "/mime-types";
 
     @Override
@@ -97,7 +102,8 @@ public class TikaMimeTypesTest extends CXFTestBase {
                 .get();
 
         String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
-        Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>) JSON.parse(jsonStr);
+        Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>)
+                GSON.fromJson(jsonStr, Map.class);
 
         assertEquals(true, json.containsKey("text/plain"));
         assertEquals(true, json.containsKey("application/xml"));
@@ -106,10 +112,11 @@ public class TikaMimeTypesTest extends CXFTestBase {
 
         Map<String, Object> bmp = json.get("image/bmp");
         assertEquals(true, bmp.containsKey("alias"));
-        Object[] aliases = (Object[]) bmp.get("alias");
-        assertEquals(2, aliases.length);
-        assertEquals("image/x-bmp", aliases[0]);
-        assertEquals("image/x-ms-bmp", aliases[1]);
+        List<Object> aliases = (List) bmp.get("alias");
+        assertEquals(2, aliases.size());
+
+        assertEquals("image/x-bmp", aliases.get(0));
+        assertEquals("image/x-ms-bmp", aliases.get(1));
 
         String whichParser = bmp.get("parser").toString();
         assertTrue("Which parser", whichParser.equals("org.apache.tika.parser.ocr.TesseractOCRParser") ||
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
index eadacfa..cd3b083 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
@@ -23,8 +23,11 @@ import static org.junit.Assert.assertTrue;
 import javax.ws.rs.core.Response;
 
 import java.io.InputStream;
+import java.util.List;
 import java.util.Map;
 
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -32,11 +35,13 @@ import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
 import org.apache.tika.parser.pdf.PDFParser;
 import org.apache.tika.parser.pkg.PackageParser;
 import org.apache.tika.server.resource.TikaParsers;
-import org.eclipse.jetty.util.ajax.JSON;
 import org.gagravarr.tika.OpusParser;
 import org.junit.Test;
 
 public class TikaParsersTest extends CXFTestBase {
+
+    private static final Gson GSON = new GsonBuilder().create();
+
     private static final String PARSERS_SUMMARY_PATH = "/parsers";
     private static final String PARSERS_DETAILS_PATH = "/parsers/details";
 
@@ -132,7 +137,8 @@ public class TikaParsersTest extends CXFTestBase {
                     .get();
 
             String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
-            Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>) JSON.parse(jsonStr);
+            Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>)
+                    GSON.fromJson(jsonStr, Map.class);
 
             // Should have a nested structure
             assertEquals(true, json.containsKey("name"));
@@ -142,8 +148,8 @@ public class TikaParsersTest extends CXFTestBase {
             assertEquals(Boolean.TRUE, json.get("composite"));
 
             // At least 20 child parsers which aren't composite, except for CompositeExternalParser
-            Object[] children = (Object[]) (Object) json.get("children");
-            assertTrue(children.length >= 2);
+            List<Object> children = (List)json.get("children");
+            assertTrue(children.size() >= 2);
             boolean hasOpus = false, hasOOXML = false, hasZip = false;
             int nonComposite = 0;
             int composite = 0;
@@ -152,11 +158,11 @@ public class TikaParsersTest extends CXFTestBase {
                 assertEquals(true, child.containsKey("name"));
                 assertEquals(true, child.containsKey("composite"));
 
-                Object[] grandChildrenArr = (Object[]) child.get("children");
+                List<Object> grandChildrenArr = (List) child.get("children");
                 if (grandChildrenArr == null) {
                     continue;
                 }
-                assertTrue(grandChildrenArr.length > 50);
+                assertTrue(grandChildrenArr.size() > 50);
                 for (Object grandChildO : grandChildrenArr) {
                     Map<String, Object> grandChildren = (Map<String, Object>) grandChildO;
 
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
index b519170..be5f02b 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
@@ -325,10 +325,9 @@ public class TikaResourceTest extends CXFTestBase {
                    .accept("text/plain")
                    .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
                   "tesseractPath",
-
                           "C://tmp//hello.bat\u0000")
                 .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
-        assertEquals(500, response.getStatus());
+        assertEquals(400, response.getStatus());
 
         response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/pdf")
@@ -349,7 +348,7 @@ public class TikaResourceTest extends CXFTestBase {
                                 "trustedPageSeparator",
                         "\u0010")
                 .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
-        assertEquals(500, response.getStatus());
+        assertEquals(400, response.getStatus());
 
     }
 


[tika] 02/03: TIKA-2692 -- minimal upgrades to allow building w Java 11-ea

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 6b377548bd75cff248bffce95177b53cf414a881
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Jul 26 12:39:09 2018 -0400

    TIKA-2692 -- minimal upgrades to allow building w Java 11-ea
---
 .../test/java/org/apache/tika/batch/fs/BatchDriverTest.java    |  5 +++--
 tika-nlp/pom.xml                                               | 10 +++++++++-
 tika-parent/pom.xml                                            |  1 +
 tika-parsers/pom.xml                                           |  2 +-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java b/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
index 654eea7..13e35e6 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
@@ -1,5 +1,3 @@
-package org.apache.tika.batch.fs;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +14,7 @@ package org.apache.tika.batch.fs;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.tika.batch.fs;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
@@ -29,6 +28,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.tika.batch.BatchProcessDriverCLI;
+import org.junit.Ignore;
 import org.junit.Test;
 
 
@@ -228,6 +228,7 @@ public class BatchDriverTest extends FSBatchTestBase {
     }
 
     @Test(timeout = 60000)
+    @Ignore("Java 11-ea+23 makes outputstreams uninterruptible")
     public void testThreadInterrupt() throws Exception {
         Path outputDir = getNewOutputDir("thread-interrupt");
         Map<String, String> args = new HashMap<>();
diff --git a/tika-nlp/pom.xml b/tika-nlp/pom.xml
index 492de17..c279875 100644
--- a/tika-nlp/pom.xml
+++ b/tika-nlp/pom.xml
@@ -137,6 +137,14 @@
           <groupId>org.apache.avro</groupId>
           <artifactId>avro</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>javax.ws.rs</groupId>
+          <artifactId>javax.ws.rs-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.annotation</groupId>
+          <artifactId>javax.annotation-api</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -208,7 +216,7 @@
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
-      <version>2.15.0</version>
+      <version>${mockito.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index bd335e0..9110f42 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -313,6 +313,7 @@
     <slf4j.version>1.7.24</slf4j.version>
     <jackson.version>2.9.5</jackson.version>
     <jaxb.version>2.3.0</jaxb.version>
+    <mockito.version>2.20.0</mockito.version>
   </properties>
 
   <build>
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 58a5dfe..38b15db 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -422,7 +422,7 @@
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
-      <version>2.15.0</version>
+      <version>${mockito.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>