You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/08 17:51:16 UTC

[tika] branch main updated: TIKA-3119 -- general upgrades for 1.25

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a504d7e  TIKA-3119 -- general upgrades for 1.25
a504d7e is described below

commit a504d7e7e8cfccbbda0d31c9362358ea4900c1ec
Author: tallison <ta...@apache.org>
AuthorDate: Tue Sep 8 13:49:09 2020 -0400

    TIKA-3119 -- general upgrades for 1.25
---
 tika-example/pom.xml                               |  6 ++
 tika-parent/pom.xml                                | 24 +++++---
 .../tika-parser-integration-tests/pom.xml          |  6 ++
 .../tika/parser/microsoft/XML2003ParserTest.java   |  6 +-
 .../tika-parser-microsoft-module/pom.xml           |  4 +-
 .../tika/parser/microsoft/JackcessExtractor.java   |  4 +-
 .../tika-parser-miscoffice-module/pom.xml          | 11 ++++
 tika-parser-modules/tika-parser-pkg-module/pom.xml |  4 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |  4 +-
 tika-parsers/pom.xml                               | 72 ++--------------------
 10 files changed, 54 insertions(+), 87 deletions(-)

diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index b6d6c2b..682409d 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -66,6 +66,12 @@
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-eval</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-codec</groupId>
+          <artifactId>commons-codec</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.tika</groupId>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 02a1be8..01c3697 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -276,21 +276,21 @@
     <!-- dependency versions -->
     <boilerpipe.version>1.1.0</boilerpipe.version>
     <!-- used by POI, PDFBox and Jackcess ...try to sync -->
-    <bouncycastle.version>1.65</bouncycastle.version>
+    <bouncycastle.version>1.66</bouncycastle.version>
     <!-- NOTE: sync brotli version with commons-compress-->
     <brotli.version>0.1.2</brotli.version>
     <commons.cli.version>1.4</commons.cli.version>
-    <commons.codec.version>1.13</commons.codec.version>
+    <commons.codec.version>1.15</commons.codec.version>
     <commons.collections4.version>4.4</commons.collections4.version>
     <commons.compress.version>1.20</commons.compress.version>
     <commons.csv.version>1.8</commons.csv.version>
     <commons.exec.version>1.3</commons.exec.version>
     <commons.io.version>2.7</commons.io.version>
-    <commons.lang3.version>3.10</commons.lang3.version>
+    <commons.lang3.version>3.11</commons.lang3.version>
     <commons.logging.version>1.2</commons.logging.version>
     <commons.math3.version>3.6.1</commons.math3.version>
     <ctakes.version>4.0.0</ctakes.version>
-    <cxf.version>3.3.7</cxf.version>
+    <cxf.version>3.4.0</cxf.version>
     <ddplist.version>1.23</ddplist.version>
     <dl4j.version>1.0.0-beta6</dl4j.version>
     <geoapi.version>3.0.1</geoapi.version>
@@ -300,8 +300,10 @@
     <httpcomponents.version>4.5.12</httpcomponents.version>
     <imageio.version>1.4.0</imageio.version>
     <isoparser.version>1.9.41.2</isoparser.version>
-    <jackrabbit.version>2.21.1</jackrabbit.version>
-    <jackson.version>2.11.0</jackson.version>
+    <jackcess.version>3.5.0</jackcess.version>
+    <jackcess.encrypt.version>3.0.0</jackcess.encrypt.version>
+    <jackrabbit.version>2.21.3</jackrabbit.version>
+    <jackson.version>2.11.2</jackson.version>
     <javax.annotation.version>1.3.2</javax.annotation.version>
     <javax.jcr.version>2.0</javax.jcr.version>
     <javax.rest.version>2.1.1</javax.rest.version>
@@ -313,14 +315,15 @@
     <joda.time.version>2.10.5</joda.time.version>
     <json.simple.version>1.1.1</json.simple.version>
     <juniversalchardet.version>1.0.3</juniversalchardet.version>
+    <junrar.version>7.3.0</junrar.version>
     <libpst.version>0.9.3</libpst.version>
     <log4j.version>1.2.17</log4j.version>
     <lombok.version>1.18.12</lombok.version>
-    <lucene.version>8.5.1</lucene.version>
+    <lucene.version>8.6.2</lucene.version>
     <metadata.extractor.version>2.14.0</metadata.extractor.version>
     <microsoft.translator.version>0.6.2</microsoft.translator.version>
     <mime4j.version>0.8.3</mime4j.version>
-    <mockito.version>3.3.3</mockito.version>
+    <mockito.version>3.5.10</mockito.version>
     <netcdf-java.version>4.5.5</netcdf-java.version>
     <opencsv.version>2.3</opencsv.version>
     <pdfbox.version>2.0.21</pdfbox.version>
@@ -335,8 +338,8 @@
     <rome.version>1.13.1</rome.version>
     <sis.version>1.0</sis.version>
     <slf4j.version>1.7.28</slf4j.version>
-    <spring.version>5.2.7.RELEASE</spring.version>
-    <sqlite.version>3.31.1</sqlite.version>
+    <spring.version>5.2.8.RELEASE</spring.version>
+    <sqlite.version>3.32.3.2</sqlite.version>
     <tagsoup.version>1.2.1</tagsoup.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
     <tukaani.version>1.8</tukaani.version>
@@ -345,6 +348,7 @@
     <vorbis.version>0.8</vorbis.version>
     <xerces.version>2.12.0</xerces.version>
     <xmpcore.version>6.1.10</xmpcore.version>
+    <zstd.version>1.4.5-6</zstd.version>
   </properties>
 
   <dependencyManagement>
diff --git a/tika-parser-modules/tika-parser-integration-tests/pom.xml b/tika-parser-modules/tika-parser-integration-tests/pom.xml
index 9264b1a..2c64609 100644
--- a/tika-parser-modules/tika-parser-integration-tests/pom.xml
+++ b/tika-parser-modules/tika-parser-integration-tests/pom.xml
@@ -78,6 +78,12 @@
             <artifactId>tika-parser-cad-module</artifactId>
             <version>${project.version}</version>
             <scope>test</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>${project.groupId}</groupId>
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
index 6ed98ea..044f832 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
@@ -33,7 +33,7 @@ public class XML2003ParserTest extends TikaTest {
     @Test
     public void testBasicWord() throws Exception {
         List<Metadata> list =  getRecursiveMetadata("testWORD2003.xml");
-        assertEquals(6, list.size());
+        assertEquals(8, list.size());
         Metadata m = list.get(0);//container doc
         String xml = m.get(RecursiveParserWrapper.TIKA_CONTENT);
         xml = xml.replaceAll("\\s+", " ");
@@ -66,9 +66,9 @@ public class XML2003ParserTest extends TikaTest {
 
         //make sure embedded docs were properly processed
         assertContains("moscow-birds",
-                Arrays.asList(list.get(5).getValues(TikaCoreProperties.SUBJECT)));
+                Arrays.asList(list.get(7).getValues(TikaCoreProperties.SUBJECT)));
 
-        assertEquals("testJPEG_EXIF.jpg", list.get(5).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
+        assertEquals("testJPEG_EXIF.jpg", list.get(7).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
 
         //check that text is extracted with breaks between elements
         String txt = getText(getResourceAsStream("/test-documents/testWORD2003.xml"),AUTO_DETECT_PARSER);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/pom.xml b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
index 3c9e744..79fd1af 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/pom.xml
+++ b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
@@ -119,7 +119,7 @@
         <dependency>
             <groupId>com.healthmarketscience.jackcess</groupId>
             <artifactId>jackcess</artifactId>
-            <version>3.0.1</version>
+            <version>${jackcess.version}</version>
             <exclusions>
                 <exclusion>
                     <groupId>org.apache.commons</groupId>
@@ -134,7 +134,7 @@
         <dependency>
             <groupId>com.healthmarketscience.jackcess</groupId>
             <artifactId>jackcess-encrypt</artifactId>
-            <version>3.0.0</version>
+            <version>${jackcess.encrypt.version}</version>
             <exclusions>
                 <exclusion>
                     <groupId>org.bouncycastle</groupId>
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
index 1ae4ab8..1443d06 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
@@ -26,6 +26,7 @@ import java.io.InputStream;
 import java.math.BigDecimal;
 import java.text.DateFormat;
 import java.text.NumberFormat;
+import java.time.LocalDateTime;
 import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -36,6 +37,7 @@ import java.util.Set;
 import com.healthmarketscience.jackcess.Column;
 import com.healthmarketscience.jackcess.DataType;
 import com.healthmarketscience.jackcess.Database;
+import com.healthmarketscience.jackcess.DateTimeType;
 import com.healthmarketscience.jackcess.PropertyMap;
 import com.healthmarketscience.jackcess.Row;
 import com.healthmarketscience.jackcess.Table;
@@ -98,7 +100,7 @@ class JackcessExtractor extends AbstractPOIFSExtractor {
         if (pw != null) {
             parentMetadata.set(JackcessParser.MDB_PW, pw);
         }
-
+        db.setDateTimeType(DateTimeType.DATE);
         PropertyMap dbp = db.getDatabaseProperties();
         for (PropertyMap.Property p : dbp) {
             parentMetadata.add(JackcessParser.MDB_PROPERTY_PREFIX + p.getName(),
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/pom.xml b/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
index 443aeb2..8d79ef7 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
+++ b/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
@@ -59,6 +59,17 @@
             <groupId>org.apache.poi</groupId>
             <artifactId>poi</artifactId>
             <version>${poi.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>${commons.codec.version}</version>
         </dependency>
         <dependency>
             <groupId>commons-io</groupId>
diff --git a/tika-parser-modules/tika-parser-pkg-module/pom.xml b/tika-parser-modules/tika-parser-pkg-module/pom.xml
index dfee3eb..c1e97ca 100644
--- a/tika-parser-modules/tika-parser-pkg-module/pom.xml
+++ b/tika-parser-modules/tika-parser-pkg-module/pom.xml
@@ -44,7 +44,7 @@
         <dependency>
             <groupId>com.github.luben</groupId>
             <artifactId>zstd-jni</artifactId>
-            <version>1.4.5-4</version>
+            <version>${zstd.version}</version>
             <scope>provided</scope>
         </dependency>
         <dependency>
@@ -55,7 +55,7 @@
         <dependency>
             <groupId>com.github.junrar</groupId>
             <artifactId>junrar</artifactId>
-            <version>4.0.0</version>
+            <version>${junrar.version}</version>
             <exclusions>
                 <!-- TIKA-2504 exclude to avoid vulnerability in plexus-utils -->
                 <exclusion>
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
index 4cdcedd..ae68550 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
@@ -23,7 +23,6 @@ import java.util.Set;
 
 import com.github.junrar.Archive;
 import com.github.junrar.exception.RarException;
-import com.github.junrar.impl.FileVolumeManager;
 import com.github.junrar.rarfile.FileHeader;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
@@ -33,7 +32,6 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
@@ -72,7 +70,7 @@ public class RarParser extends AbstractParser {
         Archive rar = null;
         try (TemporaryResources tmp = new TemporaryResources()) {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
-            rar = new Archive(new FileVolumeManager(tis.getFile()));
+            rar = new Archive(tis.getFile());
 
             if (rar.isEncrypted()) {
                 throw new EncryptedDocumentException();
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 70e2fff..8f20259 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -45,6 +45,12 @@
             <groupId>${project.groupId}</groupId>
             <artifactId>tika-parser-cad-module</artifactId>
             <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>${project.groupId}</groupId>
@@ -147,72 +153,6 @@
     <build>
         <plugins>
             <plugin>
-                <artifactId>maven-shade-plugin</artifactId>
-                <version>${maven.shade.version}</version>
-                <executions>
-                    <execution>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>shade</goal>
-                        </goals>
-                        <configuration>
-                            <createDependencyReducedPom>
-                                false
-                            </createDependencyReducedPom>
-                            <filters>
-                                <filter>
-                                    <artifact>*:*</artifact>
-                                    <excludes>
-                                        <exclude>META-INF/*.SF</exclude>
-                                        <exclude>META-INF/*.DSA</exclude>
-                                        <exclude>META-INF/*.RSA</exclude>
-                                    </excludes>
-                                </filter>
-                            </filters>
-                            <transformers>
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-                                    <addHeader>false</addHeader>
-                                </transformer>
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
-                                    <resource>META-INF/LICENSE</resource>
-                                    <file>target/classes/META-INF/LICENSE</file>
-                                </transformer>
-                                <!--                                <transformer
-                                                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
-                                                                    <resource>META-INF/DEPENDENCIES</resource>
-                                                                    <file>target/classes/META-INF/DEPENDENCIES</file>
-                                                                </transformer> -->
-                            </transformers>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-jar-plugin</artifactId>
-                <configuration>
-                    <archive>
-                        <manifestEntries>
-                            <Automatic-Module-Name>org.apache.tika.parsers</Automatic-Module-Name>
-                        </manifestEntries>
-                    </archive>
-                </configuration>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>test-jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
                 <groupId>org.apache.rat</groupId>
                 <artifactId>apache-rat-plugin</artifactId>
                 <version>${rat.version}</version>