You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/12/01 21:11:28 UTC
[tika] branch main updated: TIKA-3585 -- general updates for 2.1.1
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new cd66ecf TIKA-3585 -- general updates for 2.1.1
cd66ecf is described below
commit cd66ecf08dbac6246013262793c88e75c93e50b0
Author: tballison <ta...@apache.org>
AuthorDate: Wed Dec 1 16:11:17 2021 -0500
TIKA-3585 -- general updates for 2.1.1
---
.../src/main/resources/comparison-reports-tags.xml | 6 ++---
.../src/main/resources/comparison-reports.xml | 6 ++---
.../src/main/resources/profile-reports-tags.xml | 6 ++---
.../src/main/resources/profile-reports.xml | 6 ++---
tika-parent/pom.xml | 30 +++++++++++-----------
.../apache/tika/parser/mail/RFC822ParserTest.java | 7 ++---
6 files changed, 31 insertions(+), 30 deletions(-)
diff --git a/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml b/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
index a12e7c7..dd53a30 100644
--- a/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/comparison-reports-tags.xml
@@ -107,12 +107,12 @@
</sql>
<sql>
update exceptions_compared
- set exc_prcnt_a = cast(exc_cnt_a as decimal)/cast(total as decimal)
+ set exc_prcnt_a = cast(exc_cnt_a as double)/cast(total as double)
where total > 0;
</sql>
<sql>
update exceptions_compared
- set exc_prcnt_b = cast(exc_cnt_b as decimal)/cast(total as decimal)
+ set exc_prcnt_b = cast(exc_cnt_b as double)/cast(total as double)
where total > 0;
</sql>
@@ -705,7 +705,7 @@
</sql>
<sql>
update parse_time_compared ptc set prcnt_increase=(100.0 *
- cast(total_b as decimal)/cast(total_a as decimal))
+ cast(total_b as float)/cast(total_a as float))
where total_a > 0;
</sql>
</before>
diff --git a/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml b/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
index 9a8e29b..8aa3363 100644
--- a/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/comparison-reports.xml
@@ -107,12 +107,12 @@
</sql>
<sql>
update exceptions_compared
- set exc_prcnt_a = cast(exc_cnt_a as decimal)/cast(total as decimal)
+ set exc_prcnt_a = cast(exc_cnt_a as float)/cast(total as float)
where total > 0;
</sql>
<sql>
update exceptions_compared
- set exc_prcnt_b = cast(exc_cnt_b as decimal)/cast(total as decimal)
+ set exc_prcnt_b = cast(exc_cnt_b as float)/cast(total as float)
where total > 0;
</sql>
@@ -257,7 +257,7 @@
</sql>
<sql>
update parse_time_compared ptc set prcnt_increase=(100.0 *
- cast(total_b as decimal)/cast(total_a as decimal))
+ cast(total_b as double)/cast(total_a as double))
where total_a > 0;
</sql>
</before>
diff --git a/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml b/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
index 028a7f4..a6b0942 100644
--- a/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/profile-reports-tags.xml
@@ -122,7 +122,7 @@
num_tokens, num_alphabetic_tokens, num_common_tokens,
case
when num_alphabetic_tokens > 0
- then cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal)
+ then cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double)
else 0
end as common_div_alphabetic
from contents c
@@ -131,7 +131,7 @@
join mimes m on p.mime_id=m.mime_id
where
(num_alphabetic_tokens = 0
- or cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal) < 0.50
+ or cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double) < 0.50
)
and mime_string not like 'image%'
and mime_string not like 'video%'
@@ -159,7 +159,7 @@
when num_tokens = 0
then 0
else
- cast(num_tokens as decimal)/cast(num_pages as decimal)
+ cast(num_tokens as double)/cast(num_pages as double)
end as num_tokens_div_num_pages
from profiles p
left join contents c on p.id=c.id
diff --git a/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml b/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
index d31606f..db74481 100644
--- a/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
+++ b/tika-eval/tika-eval-app/src/main/resources/profile-reports.xml
@@ -122,7 +122,7 @@
num_tokens, num_alphabetic_tokens, num_common_tokens,
case
when num_alphabetic_tokens > 0
- then cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal)
+ then cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double)
else 0
end as common_div_alphabetic
from contents c
@@ -131,7 +131,7 @@
join mimes m on p.mime_id=m.mime_id
where
(num_alphabetic_tokens = 0
- or cast(num_common_tokens as decimal)/cast(num_alphabetic_tokens as decimal) < 0.50
+ or cast(num_common_tokens as double)/cast(num_alphabetic_tokens as double) < 0.50
)
and mime_string not like 'image%'
and mime_string not like 'video%'
@@ -159,7 +159,7 @@
when num_tokens = 0
then 0
else
- cast(num_tokens as decimal)/cast(num_pages as decimal)
+ cast(num_tokens as double)/cast(num_pages as double)
end as num_tokens_div_num_pages
from profiles p
left join contents c on p.id=c.id
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 08d98c8..4480fc1 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -274,19 +274,19 @@
<maven.scr.version>1.26.4</maven.scr.version>
<maven.surefire.version>3.0.0-M5</maven.surefire.version>
<maven.shade.version>3.2.4</maven.shade.version>
- <puppycrawl.version>8.45.1</puppycrawl.version>
+ <puppycrawl.version>9.2</puppycrawl.version>
<rat.version>0.13</rat.version>
<!-- dependency versions -->
- <aws.s3.version>1.12.43</aws.s3.version>
- <aws.transcribe.version>1.12.43</aws.transcribe.version>
+ <aws.s3.version>1.12.122</aws.s3.version>
+ <aws.transcribe.version>1.12.122</aws.transcribe.version>
<asm.version>9.2</asm.version>
<boilerpipe.version>1.1.0</boilerpipe.version>
<!-- used by POI, PDFBox and Jackcess ...try to sync -->
- <bouncycastle.version>1.69</bouncycastle.version>
+ <bouncycastle.version>1.70</bouncycastle.version>
<!-- NOTE: sync brotli version with commons-compress-->
<brotli.version>0.1.2</brotli.version>
- <commons.cli.version>1.4</commons.cli.version>
+ <commons.cli.version>1.5.0</commons.cli.version>
<commons.codec.version>1.15</commons.codec.version>
<commons.collections4.version>4.4</commons.collections4.version>
<commons.compress.version>1.21</commons.compress.version>
@@ -303,10 +303,10 @@
<!-- fakeload versions > 0.4.0 require java > 8 -->
<fakeload.version>0.4.0</fakeload.version>
<geoapi.version>3.0.1</geoapi.version>
- <google.cloud.version>2.0.1</google.cloud.version>
- <gson.version>2.8.8</gson.version>
+ <google.cloud.version>2.2.1</google.cloud.version>
+ <gson.version>2.8.9</gson.version>
<guava.version>31.0.1-jre</guava.version>
- <h2.version>1.4.200</h2.version>
+ <h2.version>2.0.202</h2.version>
<httpcomponents.version>4.5.13</httpcomponents.version>
<httpcore.version>4.4.14</httpcore.version>
<imageio.version>1.4.0</imageio.version>
@@ -323,22 +323,22 @@
<jempbox.version>1.8.16</jempbox.version>
<jetty.version>9.4.44.v20210927</jetty.version>
<jhighlight.version>1.0.3</jhighlight.version>
- <jna.version>5.9.0</jna.version>
+ <jna.version>5.10.0</jna.version>
<joda.time.version>2.10.10</joda.time.version>
<json.simple.version>1.1.1</json.simple.version>
<junit4.version>4.13.2</junit4.version>
- <junit5.version>5.8.1</junit5.version>
+ <junit5.version>5.8.2</junit5.version>
<juniversalchardet.version>1.0.3</juniversalchardet.version>
<junrar.version>7.4.0</junrar.version>
<libpst.version>0.9.3</libpst.version>
<log4j2.version>2.14.1</log4j2.version>
<lombok.version>1.18.20</lombok.version>
- <lucene.version>8.10.1</lucene.version>
+ <lucene.version>8.11.0</lucene.version>
<metadata.extractor.version>2.16.0</metadata.extractor.version>
<microsoft.translator.version>0.6.2</microsoft.translator.version>
- <!-- 0.8.5 is built with java 11 and does not work with Java 8 -->
+ <!-- 0.8.6 is built with java 11 and does not work with Java 8 -->
<mime4j.version>0.8.4</mime4j.version>
- <mockito.version>3.11.2</mockito.version>
+ <mockito.version>4.1.0</mockito.version>
<netcdf-java.version>4.5.5</netcdf-java.version>
<opencsv.version>2.3</opencsv.version>
<objenesis.version>3.2</objenesis.version>
@@ -360,9 +360,9 @@
<sis.version>1.1</sis.version>
<!-- we'll need to stay on 1.7 until we're java modularized ? -->
<slf4j.version>1.7.32</slf4j.version>
- <solrj.version>8.10.0</solrj.version>
+ <solrj.version>8.11.0</solrj.version>
<spring.version>5.3.9</spring.version>
- <sqlite.version>3.36.0.1</sqlite.version>
+ <sqlite.version>3.36.0.3</sqlite.version>
<tagsoup.version>1.2.1</tagsoup.version>
<test.containers.version>1.16.2</test.containers.version>
<!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 4b77de3..9f25776 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -22,8 +22,8 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
@@ -385,7 +385,7 @@ public class RFC822ParserTest extends TikaTest {
"Sun, 15 May 2016 01:32:00", //no timezone
"Sunday, May 15 2016 1:32 AM", "May 15 2016 1:32am", "May 15 2016 1:32 am",
"2016-05-15 01:32:00", " Sun, 15 May 2016 3:32:00 +0200",
-//format correctly handled by mime4j if no leading whitespace
+ //format correctly handled by mime4j if no leading whitespace
" Sun, 14 May 2016 20:32:00 EST",}) {
testDate(dateString, expected);
}
@@ -613,4 +613,5 @@ public class RFC822ParserTest extends TikaTest {
assertEquals(1, metadataList.size());
assertContains("ssssss", metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
}
+
}