You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/08 17:51:16 UTC
[tika] branch main updated: TIKA-3119 -- general upgrades for 1.25
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new a504d7e TIKA-3119 -- general upgrades for 1.25
a504d7e is described below
commit a504d7e7e8cfccbbda0d31c9362358ea4900c1ec
Author: tallison <ta...@apache.org>
AuthorDate: Tue Sep 8 13:49:09 2020 -0400
TIKA-3119 -- general upgrades for 1.25
---
tika-example/pom.xml | 6 ++
tika-parent/pom.xml | 24 +++++---
.../tika-parser-integration-tests/pom.xml | 6 ++
.../tika/parser/microsoft/XML2003ParserTest.java | 6 +-
.../tika-parser-microsoft-module/pom.xml | 4 +-
.../tika/parser/microsoft/JackcessExtractor.java | 4 +-
.../tika-parser-miscoffice-module/pom.xml | 11 ++++
tika-parser-modules/tika-parser-pkg-module/pom.xml | 4 +-
.../java/org/apache/tika/parser/pkg/RarParser.java | 4 +-
tika-parsers/pom.xml | 72 ++--------------------
10 files changed, 54 insertions(+), 87 deletions(-)
diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index b6d6c2b..682409d 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -66,6 +66,12 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-eval</artifactId>
<version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 02a1be8..01c3697 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -276,21 +276,21 @@
<!-- dependency versions -->
<boilerpipe.version>1.1.0</boilerpipe.version>
<!-- used by POI, PDFBox and Jackcess ...try to sync -->
- <bouncycastle.version>1.65</bouncycastle.version>
+ <bouncycastle.version>1.66</bouncycastle.version>
<!-- NOTE: sync brotli version with commons-compress-->
<brotli.version>0.1.2</brotli.version>
<commons.cli.version>1.4</commons.cli.version>
- <commons.codec.version>1.13</commons.codec.version>
+ <commons.codec.version>1.15</commons.codec.version>
<commons.collections4.version>4.4</commons.collections4.version>
<commons.compress.version>1.20</commons.compress.version>
<commons.csv.version>1.8</commons.csv.version>
<commons.exec.version>1.3</commons.exec.version>
<commons.io.version>2.7</commons.io.version>
- <commons.lang3.version>3.10</commons.lang3.version>
+ <commons.lang3.version>3.11</commons.lang3.version>
<commons.logging.version>1.2</commons.logging.version>
<commons.math3.version>3.6.1</commons.math3.version>
<ctakes.version>4.0.0</ctakes.version>
- <cxf.version>3.3.7</cxf.version>
+ <cxf.version>3.4.0</cxf.version>
<ddplist.version>1.23</ddplist.version>
<dl4j.version>1.0.0-beta6</dl4j.version>
<geoapi.version>3.0.1</geoapi.version>
@@ -300,8 +300,10 @@
<httpcomponents.version>4.5.12</httpcomponents.version>
<imageio.version>1.4.0</imageio.version>
<isoparser.version>1.9.41.2</isoparser.version>
- <jackrabbit.version>2.21.1</jackrabbit.version>
- <jackson.version>2.11.0</jackson.version>
+ <jackcess.version>3.5.0</jackcess.version>
+ <jackcess.encrypt.version>3.0.0</jackcess.encrypt.version>
+ <jackrabbit.version>2.21.3</jackrabbit.version>
+ <jackson.version>2.11.2</jackson.version>
<javax.annotation.version>1.3.2</javax.annotation.version>
<javax.jcr.version>2.0</javax.jcr.version>
<javax.rest.version>2.1.1</javax.rest.version>
@@ -313,14 +315,15 @@
<joda.time.version>2.10.5</joda.time.version>
<json.simple.version>1.1.1</json.simple.version>
<juniversalchardet.version>1.0.3</juniversalchardet.version>
+ <junrar.version>7.3.0</junrar.version>
<libpst.version>0.9.3</libpst.version>
<log4j.version>1.2.17</log4j.version>
<lombok.version>1.18.12</lombok.version>
- <lucene.version>8.5.1</lucene.version>
+ <lucene.version>8.6.2</lucene.version>
<metadata.extractor.version>2.14.0</metadata.extractor.version>
<microsoft.translator.version>0.6.2</microsoft.translator.version>
<mime4j.version>0.8.3</mime4j.version>
- <mockito.version>3.3.3</mockito.version>
+ <mockito.version>3.5.10</mockito.version>
<netcdf-java.version>4.5.5</netcdf-java.version>
<opencsv.version>2.3</opencsv.version>
<pdfbox.version>2.0.21</pdfbox.version>
@@ -335,8 +338,8 @@
<rome.version>1.13.1</rome.version>
<sis.version>1.0</sis.version>
<slf4j.version>1.7.28</slf4j.version>
- <spring.version>5.2.7.RELEASE</spring.version>
- <sqlite.version>3.31.1</sqlite.version>
+ <spring.version>5.2.8.RELEASE</spring.version>
+ <sqlite.version>3.32.3.2</sqlite.version>
<tagsoup.version>1.2.1</tagsoup.version>
<!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
<tukaani.version>1.8</tukaani.version>
@@ -345,6 +348,7 @@
<vorbis.version>0.8</vorbis.version>
<xerces.version>2.12.0</xerces.version>
<xmpcore.version>6.1.10</xmpcore.version>
+ <zstd.version>1.4.5-6</zstd.version>
</properties>
<dependencyManagement>
diff --git a/tika-parser-modules/tika-parser-integration-tests/pom.xml b/tika-parser-modules/tika-parser-integration-tests/pom.xml
index 9264b1a..2c64609 100644
--- a/tika-parser-modules/tika-parser-integration-tests/pom.xml
+++ b/tika-parser-modules/tika-parser-integration-tests/pom.xml
@@ -78,6 +78,12 @@
<artifactId>tika-parser-cad-module</artifactId>
<version>${project.version}</version>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
index 6ed98ea..044f832 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/microsoft/XML2003ParserTest.java
@@ -33,7 +33,7 @@ public class XML2003ParserTest extends TikaTest {
@Test
public void testBasicWord() throws Exception {
List<Metadata> list = getRecursiveMetadata("testWORD2003.xml");
- assertEquals(6, list.size());
+ assertEquals(8, list.size());
Metadata m = list.get(0);//container doc
String xml = m.get(RecursiveParserWrapper.TIKA_CONTENT);
xml = xml.replaceAll("\\s+", " ");
@@ -66,9 +66,9 @@ public class XML2003ParserTest extends TikaTest {
//make sure embedded docs were properly processed
assertContains("moscow-birds",
- Arrays.asList(list.get(5).getValues(TikaCoreProperties.SUBJECT)));
+ Arrays.asList(list.get(7).getValues(TikaCoreProperties.SUBJECT)));
- assertEquals("testJPEG_EXIF.jpg", list.get(5).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
+ assertEquals("testJPEG_EXIF.jpg", list.get(7).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
//check that text is extracted with breaks between elements
String txt = getText(getResourceAsStream("/test-documents/testWORD2003.xml"),AUTO_DETECT_PARSER);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/pom.xml b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
index 3c9e744..79fd1af 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/pom.xml
+++ b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
@@ -119,7 +119,7 @@
<dependency>
<groupId>com.healthmarketscience.jackcess</groupId>
<artifactId>jackcess</artifactId>
- <version>3.0.1</version>
+ <version>${jackcess.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
@@ -134,7 +134,7 @@
<dependency>
<groupId>com.healthmarketscience.jackcess</groupId>
<artifactId>jackcess-encrypt</artifactId>
- <version>3.0.0</version>
+ <version>${jackcess.encrypt.version}</version>
<exclusions>
<exclusion>
<groupId>org.bouncycastle</groupId>
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
index 1ae4ab8..1443d06 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
@@ -26,6 +26,7 @@ import java.io.InputStream;
import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.NumberFormat;
+import java.time.LocalDateTime;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
@@ -36,6 +37,7 @@ import java.util.Set;
import com.healthmarketscience.jackcess.Column;
import com.healthmarketscience.jackcess.DataType;
import com.healthmarketscience.jackcess.Database;
+import com.healthmarketscience.jackcess.DateTimeType;
import com.healthmarketscience.jackcess.PropertyMap;
import com.healthmarketscience.jackcess.Row;
import com.healthmarketscience.jackcess.Table;
@@ -98,7 +100,7 @@ class JackcessExtractor extends AbstractPOIFSExtractor {
if (pw != null) {
parentMetadata.set(JackcessParser.MDB_PW, pw);
}
-
+ db.setDateTimeType(DateTimeType.DATE);
PropertyMap dbp = db.getDatabaseProperties();
for (PropertyMap.Property p : dbp) {
parentMetadata.add(JackcessParser.MDB_PROPERTY_PREFIX + p.getName(),
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/pom.xml b/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
index 443aeb2..8d79ef7 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
+++ b/tika-parser-modules/tika-parser-miscoffice-module/pom.xml
@@ -59,6 +59,17 @@
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons.codec.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
diff --git a/tika-parser-modules/tika-parser-pkg-module/pom.xml b/tika-parser-modules/tika-parser-pkg-module/pom.xml
index dfee3eb..c1e97ca 100644
--- a/tika-parser-modules/tika-parser-pkg-module/pom.xml
+++ b/tika-parser-modules/tika-parser-pkg-module/pom.xml
@@ -44,7 +44,7 @@
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
- <version>1.4.5-4</version>
+ <version>${zstd.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
@@ -55,7 +55,7 @@
<dependency>
<groupId>com.github.junrar</groupId>
<artifactId>junrar</artifactId>
- <version>4.0.0</version>
+ <version>${junrar.version}</version>
<exclusions>
<!-- TIKA-2504 exclude to avoid vulnerability in plexus-utils -->
<exclusion>
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
index 4cdcedd..ae68550 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/RarParser.java
@@ -23,7 +23,6 @@ import java.util.Set;
import com.github.junrar.Archive;
import com.github.junrar.exception.RarException;
-import com.github.junrar.impl.FileVolumeManager;
import com.github.junrar.rarfile.FileHeader;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
@@ -33,7 +32,6 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -72,7 +70,7 @@ public class RarParser extends AbstractParser {
Archive rar = null;
try (TemporaryResources tmp = new TemporaryResources()) {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
- rar = new Archive(new FileVolumeManager(tis.getFile()));
+ rar = new Archive(tis.getFile());
if (rar.isEncrypted()) {
throw new EncryptedDocumentException();
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 70e2fff..8f20259 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -45,6 +45,12 @@
<groupId>${project.groupId}</groupId>
<artifactId>tika-parser-cad-module</artifactId>
<version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
@@ -147,72 +153,6 @@
<build>
<plugins>
<plugin>
- <artifactId>maven-shade-plugin</artifactId>
- <version>${maven.shade.version}</version>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <createDependencyReducedPom>
- false
- </createDependencyReducedPom>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- </excludes>
- </filter>
- </filters>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
- <addHeader>false</addHeader>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
- <resource>META-INF/LICENSE</resource>
- <file>target/classes/META-INF/LICENSE</file>
- </transformer>
- <!-- <transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
- <resource>META-INF/DEPENDENCIES</resource>
- <file>target/classes/META-INF/DEPENDENCIES</file>
- </transformer> -->
- </transformers>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <archive>
- <manifestEntries>
- <Automatic-Module-Name>org.apache.tika.parsers</Automatic-Module-Name>
- </manifestEntries>
- </archive>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>test-jar</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>${rat.version}</version>