You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/12/01 21:11:28 UTC

[tika] branch main updated: TIKA-3585 -- general updates for 2.1.1

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new cd66ecf  TIKA-3585 -- general updates for 2.1.1
cd66ecf is described below

commit cd66ecf08dbac6246013262793c88e75c93e50b0
Author: tballison <ta...@apache.org>
AuthorDate: Wed Dec 1 16:11:17 2021 -0500

    TIKA-3585 -- general updates for 2.1.1
---
 .../src/main/resources/comparison-reports-tags.xml |  6 ++---
 .../src/main/resources/comparison-reports.xml      |  6 ++---
 .../src/main/resources/profile-reports-tags.xml    |  6 ++---
 .../src/main/resources/profile-reports.xml         |  6 ++---
 tika-parent/pom.xml                                | 30 +++++++++++-----------
 .../apache/tika/parser/mail/RFC822ParserTest.java  |  7 ++---
 6 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml b/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
index a12e7c7..dd53a30 100644
--- a/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
@@ -107,12 +107,12 @@
         </sql>
         <sql>
             update exceptions_compared
-            set exc_prcnt_a = cast(exc_cnt_a as decimal)/cast(total as decimal)
+            set exc_prcnt_a = cast(exc_cnt_a as double)/cast(total as double)
             where total > 0;
         </sql>
         <sql>
             update exceptions_compared
-            set exc_prcnt_b = cast(exc_cnt_b as decimal)/cast(total as decimal)
+            set exc_prcnt_b = cast(exc_cnt_b as double)/cast(total as double)
             where total > 0;
         </sql>
 
@@ -705,7 +705,7 @@
         </sql>
         <sql>
             update parse_time_compared ptc set prcnt_increase=(100.0 *
-            cast(total_b as decimal)/cast(total_a as decimal))
+            cast(total_b as float)/cast(total_a as float))
             where total_a > 0;
         </sql>
     </before>
diff --git a/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml b/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
index 9a8e29b..8aa3363 100644
--- a/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
@@ -107,12 +107,12 @@
         </sql>
         <sql>
             update exceptions_compared
-            set exc_prcnt_a = cast(exc_cnt_a as decimal)/cast(total as decimal)
+            set exc_prcnt_a = cast(exc_cnt_a as float)/cast(total as float)
             where total > 0;
         </sql>
         <sql>
             update exceptions_compared
-            set exc_prcnt_b = cast(exc_cnt_b as decimal)/cast(total as decimal)
+            set exc_prcnt_b = cast(exc_cnt_b as float)/cast(total as float)
             where total > 0;
         </sql>
 
@@ -257,7 +257,7 @@
         </sql>
         <sql>
             update parse_time_compared ptc set prcnt_increase=(100.0 *
-            cast(total_b as decimal)/cast(total_a as decimal))
+            cast(total_b as double)/cast(total_a as double))
             where total_a > 0;
         </sql>
     </before>
diff --git a/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml b/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
index 028a7f4..a6b0942 100644
--- a/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
@@ -122,7 +122,7 @@
             num_tokens, num_alphabetic_tokens, num_common_tokens,
             case
                 when num_alphabetic_tokens &gt; 0
-                then cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal)
+                then cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double)
                 else 0
             end as common_div_alphabetic
             from contents c
@@ -131,7 +131,7 @@
             join mimes m on p.mime_id=m.mime_id
             where
                 (num_alphabetic_tokens = 0
-                    or cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal) &lt; 0.50
+                    or cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double) &lt; 0.50
                 )
             and mime_string not like 'image%'
             and mime_string not like 'video%'
@@ -159,7 +159,7 @@
                 when num_tokens = 0
                     then 0
                 else
-                    cast(num_tokens as decimal)/cast(num_pages as decimal)
+                    cast(num_tokens as double)/cast(num_pages as double)
             end as num_tokens_div_num_pages
             from profiles p
             left join contents c on p.id=c.id
diff --git a/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml b/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
index d31606f..db74481 100644
--- a/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
@@ -122,7 +122,7 @@
             num_tokens, num_alphabetic_tokens, num_common_tokens,
             case
                 when num_alphabetic_tokens &gt; 0
-                then cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal)
+                then cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double)
                 else 0
             end as common_div_alphabetic
             from contents c
@@ -131,7 +131,7 @@
             join mimes m on p.mime_id=m.mime_id
             where
                 (num_alphabetic_tokens = 0
-                    or cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal) &lt; 0.50
+                    or cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double) &lt; 0.50
                 )
             and mime_string not like 'image%'
             and mime_string not like 'video%'
@@ -159,7 +159,7 @@
                 when num_tokens = 0
                     then 0
                 else
-                    cast(num_tokens as decimal)/cast(num_pages as decimal)
+                    cast(num_tokens as double)/cast(num_pages as double)
             end as num_tokens_div_num_pages
             from profiles p
             left join contents c on p.id=c.id
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 08d98c8..4480fc1 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -274,19 +274,19 @@
     <maven.scr.version>1.26.4</maven.scr.version>
     <maven.surefire.version>3.0.0-M5</maven.surefire.version>
     <maven.shade.version>3.2.4</maven.shade.version>
-    <puppycrawl.version>8.45.1</puppycrawl.version>
+    <puppycrawl.version>9.2</puppycrawl.version>
     <rat.version>0.13</rat.version>
 
     <!-- dependency versions -->
-    <aws.s3.version>1.12.43</aws.s3.version>
-    <aws.transcribe.version>1.12.43</aws.transcribe.version>
+    <aws.s3.version>1.12.122</aws.s3.version>
+    <aws.transcribe.version>1.12.122</aws.transcribe.version>
     <asm.version>9.2</asm.version>
     <boilerpipe.version>1.1.0</boilerpipe.version>
     <!-- used by POI, PDFBox and Jackcess ...try to sync -->
-    <bouncycastle.version>1.69</bouncycastle.version>
+    <bouncycastle.version>1.70</bouncycastle.version>
     <!-- NOTE: sync brotli version with commons-compress-->
     <brotli.version>0.1.2</brotli.version>
-    <commons.cli.version>1.4</commons.cli.version>
+    <commons.cli.version>1.5.0</commons.cli.version>
     <commons.codec.version>1.15</commons.codec.version>
     <commons.collections4.version>4.4</commons.collections4.version>
     <commons.compress.version>1.21</commons.compress.version>
@@ -303,10 +303,10 @@
     <!-- fakeload versions &gt; 0.4.0 require java > 8 -->
     <fakeload.version>0.4.0</fakeload.version>
     <geoapi.version>3.0.1</geoapi.version>
-    <google.cloud.version>2.0.1</google.cloud.version>
-    <gson.version>2.8.8</gson.version>
+    <google.cloud.version>2.2.1</google.cloud.version>
+    <gson.version>2.8.9</gson.version>
     <guava.version>31.0.1-jre</guava.version>
-    <h2.version>1.4.200</h2.version>
+    <h2.version>2.0.202</h2.version>
     <httpcomponents.version>4.5.13</httpcomponents.version>
     <httpcore.version>4.4.14</httpcore.version>
     <imageio.version>1.4.0</imageio.version>
@@ -323,22 +323,22 @@
     <jempbox.version>1.8.16</jempbox.version>
     <jetty.version>9.4.44.v20210927</jetty.version>
     <jhighlight.version>1.0.3</jhighlight.version>
-    <jna.version>5.9.0</jna.version>
+    <jna.version>5.10.0</jna.version>
     <joda.time.version>2.10.10</joda.time.version>
     <json.simple.version>1.1.1</json.simple.version>
     <junit4.version>4.13.2</junit4.version>
-    <junit5.version>5.8.1</junit5.version>
+    <junit5.version>5.8.2</junit5.version>
     <juniversalchardet.version>1.0.3</juniversalchardet.version>
     <junrar.version>7.4.0</junrar.version>
     <libpst.version>0.9.3</libpst.version>
     <log4j2.version>2.14.1</log4j2.version>
     <lombok.version>1.18.20</lombok.version>
-    <lucene.version>8.10.1</lucene.version>
+    <lucene.version>8.11.0</lucene.version>
     <metadata.extractor.version>2.16.0</metadata.extractor.version>
     <microsoft.translator.version>0.6.2</microsoft.translator.version>
-    <!-- 0.8.5 is built with java 11 and does not work with Java 8 -->
+    <!-- 0.8.6 is built with java 11 and does not work with Java 8 -->
     <mime4j.version>0.8.4</mime4j.version>
-    <mockito.version>3.11.2</mockito.version>
+    <mockito.version>4.1.0</mockito.version>
     <netcdf-java.version>4.5.5</netcdf-java.version>
     <opencsv.version>2.3</opencsv.version>
     <objenesis.version>3.2</objenesis.version>
@@ -360,9 +360,9 @@
     <sis.version>1.1</sis.version>
     <!-- we'll need to stay on 1.7 until we're java modularized ? -->
     <slf4j.version>1.7.32</slf4j.version>
-    <solrj.version>8.10.0</solrj.version>
+    <solrj.version>8.11.0</solrj.version>
     <spring.version>5.3.9</spring.version>
-    <sqlite.version>3.36.0.1</sqlite.version>
+    <sqlite.version>3.36.0.3</sqlite.version>
     <tagsoup.version>1.2.1</tagsoup.version>
     <test.containers.version>1.16.2</test.containers.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 4b77de3..9f25776 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -22,8 +22,8 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.times;
@@ -385,7 +385,7 @@ public class RFC822ParserTest extends TikaTest {
                 "Sun, 15 May 2016 01:32:00", //no timezone
                 "Sunday, May 15 2016 1:32 AM", "May 15 2016 1:32am", "May 15 2016 1:32 am",
                 "2016-05-15 01:32:00", "      Sun, 15 May 2016 3:32:00 +0200",
-//format correctly handled by mime4j if no leading whitespace
+                //format correctly handled by mime4j if no leading whitespace
                 "      Sun, 14 May 2016 20:32:00 EST",}) {
             testDate(dateString, expected);
         }
@@ -613,4 +613,5 @@ public class RFC822ParserTest extends TikaTest {
         assertEquals(1, metadataList.size());
         assertContains("ssssss", metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
     }
+
 }