You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/04 20:06:50 UTC

[tika] branch main updated (bdd66150b -> ec693f417)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


    from bdd66150b Merge pull request #785 from apache/dependabot/maven/aws.version-1.12.335
     new 0bffbb44c TIKA-3918 -- special handling when spoolToDisk == 0
     new ec693f417 TIKA-3916 -- move nearly all dependency mgmt into parent pom

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tika-bundles/pom.xml                               |   9 --
 .../org/apache/tika/parser/AutoDetectParser.java   |  44 +++---
 tika-example/pom.xml                               |  10 --
 tika-parent/pom.xml                                | 165 ++++++++++++++++++---
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +
 tika-parsers/tika-parsers-ml/tika-dl/pom.xml       |  34 -----
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |  19 ---
 tika-pipes/pom.xml                                 | 106 -------------
 .../tika/pipes/emitter/jdbc/JDBCEmitter.java       |  97 +++++++++++-
 .../tika/pipes/emitter/jdbc/JDBCEmitterTest.java   |  14 ++
 .../resources/configs/tika-config-jdbc-emitter.xml |   9 +-
 11 files changed, 291 insertions(+), 218 deletions(-)


[tika] 01/02: TIKA-3918 -- special handling when spoolToDisk == 0

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 0bffbb44c34b6ccbbd2eda8b0e813e51750cf857
Author: tballison <ta...@apache.org>
AuthorDate: Fri Nov 4 15:44:03 2022 -0400

    TIKA-3918 -- special handling when spoolToDisk == 0
---
 .../org/apache/tika/parser/AutoDetectParser.java   | 44 +++++++++++++---------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 4d870d771..b8a0cb8aa 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -30,6 +30,7 @@ import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -137,7 +138,8 @@ public class AutoDetectParser extends CompositeParser {
     public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
                       ParseContext context) throws IOException, SAXException, TikaException {
         if (autoDetectParserConfig.getMetadataWriteFilterFactory() != null) {
-            metadata.setMetadataWriteFilter(autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance());
+            metadata.setMetadataWriteFilter(
+                    autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance());
         }
         TemporaryResources tmp = new TemporaryResources();
         try {
@@ -164,9 +166,8 @@ public class AutoDetectParser extends CompositeParser {
             }
             handler = decorateHandler(handler, metadata, context, autoDetectParserConfig);
             // TIKA-216: Zip bomb prevention
-            SecureContentHandler sch =
-                    handler != null ?
-                        createSecureContentHandler(handler, tis, autoDetectParserConfig) : null;
+            SecureContentHandler sch = handler != null ?
+                    createSecureContentHandler(handler, tis, autoDetectParserConfig) : null;
 
             initializeEmbeddedDocumentExtractor(metadata, context);
 
@@ -183,13 +184,12 @@ public class AutoDetectParser extends CompositeParser {
         }
     }
 
-    private ContentHandler decorateHandler(ContentHandler handler,
-                                           Metadata metadata, ParseContext context,
+    private ContentHandler decorateHandler(ContentHandler handler, Metadata metadata,
+                                           ParseContext context,
                                            AutoDetectParserConfig autoDetectParserConfig) {
         if (context.get(RecursiveParserWrapper.RecursivelySecureContentHandler.class) != null) {
             //using the recursiveparserwrapper. we should decorate this handler
-            return autoDetectParserConfig
-                    .getContentHandlerDecoratorFactory()
+            return autoDetectParserConfig.getContentHandlerDecoratorFactory()
                     .decorate(handler, metadata, context);
         }
         ParseRecord parseRecord = context.get(ParseRecord.class);
@@ -203,16 +203,26 @@ public class AutoDetectParser extends CompositeParser {
 
     private void maybeSpool(TikaInputStream tis, AutoDetectParserConfig autoDetectParserConfig,
                             Metadata metadata) throws IOException {
-        if (! tis.hasFile() && //if there's already a file, stop now
-                autoDetectParserConfig.getSpoolToDisk() != null && //if this is not
-                // configured, stop now
-                autoDetectParserConfig.getSpoolToDisk() > -1 &&
-                metadata.get(Metadata.CONTENT_LENGTH) != null) {
+        if (tis.hasFile()) {
+            return;
+        }
+        if (autoDetectParserConfig.getSpoolToDisk() == null) {
+            return;
+        }
+        //whether or not a content-length has been sent in,
+        //if spoolToDisk == 0, spool it
+        if (autoDetectParserConfig.getSpoolToDisk() == 0) {
+            tis.getPath();
+            metadata.set(HttpHeaders.CONTENT_LENGTH, Long.toString(tis.getLength()));
+            return;
+        }
+        if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
             long len = -1l;
             try {
                 len = Long.parseLong(metadata.get(Metadata.CONTENT_LENGTH));
                 if (len > autoDetectParserConfig.getSpoolToDisk()) {
                     tis.getPath();
+                    metadata.set(HttpHeaders.CONTENT_LENGTH, Long.toString(tis.getLength()));
                 }
             } catch (NumberFormatException e) {
                 //swallow...maybe log?
@@ -230,9 +240,8 @@ public class AutoDetectParser extends CompositeParser {
         if (p == null) {
             context.set(Parser.class, this);
         }
-        EmbeddedDocumentExtractor edx =
-                autoDetectParserConfig.getEmbeddedDocumentExtractorFactory()
-                        .newInstance(metadata, context);
+        EmbeddedDocumentExtractor edx = autoDetectParserConfig.getEmbeddedDocumentExtractorFactory()
+                .newInstance(metadata, context);
         context.set(EmbeddedDocumentExtractor.class, edx);
     }
 
@@ -243,7 +252,8 @@ public class AutoDetectParser extends CompositeParser {
         parse(stream, handler, metadata, context);
     }
 
-    private SecureContentHandler createSecureContentHandler(ContentHandler handler, TikaInputStream tis,
+    private SecureContentHandler createSecureContentHandler(ContentHandler handler,
+                                                            TikaInputStream tis,
                                                             AutoDetectParserConfig config) {
         SecureContentHandler sch = new SecureContentHandler(handler, tis);
         if (config == null) {


[tika] 02/02: TIKA-3916 -- move nearly all dependency mgmt into parent pom

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ec693f417542e868968069a847f924ef5f7f9467
Author: tballison <ta...@apache.org>
AuthorDate: Fri Nov 4 16:06:38 2022 -0400

    TIKA-3916 -- move nearly all dependency mgmt into parent pom
---
 tika-bundles/pom.xml                               |   9 --
 tika-example/pom.xml                               |  10 --
 tika-parent/pom.xml                                | 165 ++++++++++++++++++---
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +
 tika-parsers/tika-parsers-ml/tika-dl/pom.xml       |  34 -----
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |  19 ---
 tika-pipes/pom.xml                                 | 106 -------------
 .../tika/pipes/emitter/jdbc/JDBCEmitter.java       |  97 +++++++++++-
 .../tika/pipes/emitter/jdbc/JDBCEmitterTest.java   |  14 ++
 .../resources/configs/tika-config-jdbc-emitter.xml |   9 +-
 10 files changed, 264 insertions(+), 201 deletions(-)

diff --git a/tika-bundles/pom.xml b/tika-bundles/pom.xml
index 69dfefd6e..45d19f993 100644
--- a/tika-bundles/pom.xml
+++ b/tika-bundles/pom.xml
@@ -35,15 +35,6 @@
   <modules>
     <module>tika-bundle-standard</module>
   </modules>
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.compendium</artifactId>
-        <version>${osgi.compendium.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
 
   <scm>
     <tag>2.2.1-rc2</tag>
diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index cc1f66fe2..b140b8404 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -33,16 +33,6 @@
   <name>Apache Tika examples</name>
   <url>https://tika.apache.org/</url>
 
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.compendium</artifactId>
-        <version>${osgi.compendium.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
-
   <!-- List of dependencies that we depend on for the examples. See the full list of Tika
        modules and how to use them at http://mvnrepository.com/artifact/org.apache.tika.-->
   <dependencies>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index c4582e8a8..d49c3639a 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -404,17 +404,42 @@
         <artifactId>biz.aQute.bndlib</artifactId>
         <version>6.3.1</version>
       </dependency>
-      <!-- for bndlib -->
       <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.util.function</artifactId>
-        <version>1.2.0</version>
+        <groupId>com.amazonaws</groupId>
+        <artifactId>aws-java-sdk-s3</artifactId>
+        <version>${aws.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-storage-blob</artifactId>
+        <version>12.20.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-core</artifactId>
+        <version>1.33.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-core-http-netty</artifactId>
+        <version>1.12.6</version>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-transport-native-epoll</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>com.drewnoakes</groupId>
         <artifactId>metadata-extractor</artifactId>
         <version>${metadata.extractor.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.google.cloud</groupId>
+        <artifactId>google-cloud-storage</artifactId>
+        <version>${google.cloud.version}</version>
+      </dependency>
       <dependency>
         <groupId>com.mchange</groupId>
         <artifactId>c3p0</artifactId>
@@ -512,6 +537,31 @@
         <artifactId>icu4j</artifactId>
         <version>${icu4j.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.common</groupId>
+        <artifactId>common-io</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-bmp</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-jpeg</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-psd</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-tiff</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
       <dependency>
         <groupId>commons-cli</groupId>
         <artifactId>commons-cli</artifactId>
@@ -522,16 +572,6 @@
         <artifactId>commons-codec</artifactId>
         <version>${commons.codec.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-collections4</artifactId>
-        <version>${commons.collections4.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-csv</artifactId>
-        <version>${commons.csv.version}</version>
-      </dependency>
       <dependency>
         <groupId>commons-io</groupId>
         <artifactId>commons-io</artifactId>
@@ -543,9 +583,24 @@
         <version>${commons.logging.version}</version>
       </dependency>
       <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-math3</artifactId>
-        <version>${commons.math3.version}</version>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-buffer</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec-http</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec-http2</artifactId>
+        <version>${netty.version}</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>
@@ -562,6 +617,36 @@
         <artifactId>netty-transport-native-unix-common</artifactId>
         <version>${netty.version}</version>
       </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-handler-proxy</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-resolver</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-unix-common</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-epoll</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.projectreactor</groupId>
+        <artifactId>reactor-core</artifactId>
+        <version>3.4.24</version>
+      </dependency>
       <dependency>
         <groupId>io.projectreactor.netty</groupId>
         <artifactId>reactor-netty-core</artifactId>
@@ -602,11 +687,21 @@
         <artifactId>jna</artifactId>
         <version>${jna.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-collections4</artifactId>
+        <version>${commons.collections4.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-compress</artifactId>
         <version>${commons.compress.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-csv</artifactId>
+        <version>${commons.csv.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-exec</artifactId>
@@ -617,6 +712,11 @@
         <artifactId>commons-lang3</artifactId>
         <version>${commons.lang3.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-math3</artifactId>
+        <version>${commons.math3.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.cxf</groupId>
         <artifactId>cxf-rt-frontend-jaxrs</artifactId>
@@ -764,6 +864,11 @@
         <artifactId>jetty-client</artifactId>
         <version>${jetty.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.freemarker</groupId>
+        <artifactId>freemarker</artifactId>
+        <version>2.3.31</version>
+      </dependency>
       <dependency>
         <groupId>org.glassfish.jaxb</groupId>
         <artifactId>jaxb-runtime</artifactId>
@@ -818,11 +923,22 @@
         <artifactId>ops4j-base-util-property</artifactId>
         <version>${ops4j.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.osgi</groupId>
+        <artifactId>org.osgi.compendium</artifactId>
+        <version>${osgi.compendium.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.osgi</groupId>
         <artifactId>org.osgi.core</artifactId>
         <version>${osgi.core.version}</version>
       </dependency>
+      <!-- for bndlib -->
+      <dependency>
+        <groupId>org.osgi</groupId>
+        <artifactId>org.osgi.util.function</artifactId>
+        <version>1.2.0</version>
+      </dependency>
       <dependency>
         <groupId>org.quartz-scheduler</groupId>
         <artifactId>quartz</artifactId>
@@ -855,7 +971,20 @@
         <version>${test.containers.version}</version>
         <scope>test</scope>
       </dependency>
-
+      <!-- need to specify this to avoid
+      version clash within ctakes-core 4.0.0 -->
+      <dependency>
+        <groupId>org.apache.uima</groupId>
+        <artifactId>uimafit-core</artifactId>
+        <version>${uimafit.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.uima</groupId>
+        <artifactId>uimaj-core</artifactId>
+        <version>${uima.version}</version>
+        <scope>provided</scope>
+      </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>
         <artifactId>snappy-java</artifactId>
diff --git a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml b/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
index 4b5760a6f..d3d97dc5f 100644
--- a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
@@ -32,6 +32,8 @@
   <url>http://maven.apache.org</url>
 
 
+  <!-- we're not maintaining this module.
+  Keep this here instead of cluttering the parent pom -->
   <dependencyManagement>
     <dependencies>
       <dependency>
diff --git a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml b/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
index 1d8e2cd82..d12002ec8 100644
--- a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
@@ -31,40 +31,6 @@
   <name>Apache Tika Deep Learning (powered by DL4J)</name>
   <url>http://maven.apache.org</url>
 
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>com.twelvemonkeys.common</groupId>
-        <artifactId>common-io</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-bmp</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-jpeg</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-psd</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-tiff</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>org.freemarker</groupId>
-        <artifactId>freemarker</artifactId>
-        <version>2.3.31</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
index b33250eb9..9301a3e46 100644
--- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
@@ -29,25 +29,6 @@
     <name>Apache Tika natural language process module</name>
 
 
-    <dependencyManagement>
-        <dependencies>
-            <!-- need to specify this to avoid
-                  version clash within ctakes-core 4.0.0 -->
-            <dependency>
-                <groupId>org.apache.uima</groupId>
-                <artifactId>uimafit-core</artifactId>
-                <version>${uimafit.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.uima</groupId>
-                <artifactId>uimaj-core</artifactId>
-                <version>${uima.version}</version>
-                <scope>provided</scope>
-            </dependency>
-        </dependencies>
-    </dependencyManagement>
-
     <dependencies>
         <dependency>
             <groupId>${project.groupId}</groupId>
diff --git a/tika-pipes/pom.xml b/tika-pipes/pom.xml
index e79e26450..1b4eb9f66 100644
--- a/tika-pipes/pom.xml
+++ b/tika-pipes/pom.xml
@@ -38,112 +38,6 @@
     <module>tika-async-cli</module>
   </modules>
 
-  <dependencyManagement>
-    <!-- this is caused by convergence errors in
-        azure-storage-blob since 12.15.0 and its dependencies.
-        Hopefully, we can get rid of this all with the
-        next upgrade.
-
-        maven enforcer, for reasons unknown, is not
-        seeing netty-transport-native-epoll here, and it
-        has to be excluded from azure-storage-blob
-        in the submodules.
-        -->
-    <dependencies>
-      <dependency>
-        <groupId>com.amazonaws</groupId>
-        <artifactId>aws-java-sdk-s3</artifactId>
-        <version>${aws.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-storage-blob</artifactId>
-        <!-- when upgrading this, remove it from ossindex exclusions in parent -->
-        <version>12.20.0</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-core</artifactId>
-        <version>1.33.0</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-core-http-netty</artifactId>
-        <version>1.12.6</version>
-        <exclusions>
-            <exclusion>
-                <groupId>io.netty</groupId>
-                <artifactId>netty-transport-native-epoll</artifactId>
-            </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>com.google.cloud</groupId>
-        <artifactId>google-cloud-storage</artifactId>
-        <version>${google.cloud.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.projectreactor</groupId>
-        <artifactId>reactor-core</artifactId>
-        <version>3.4.24</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-buffer</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec-http</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec-http2</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-common</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-handler</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-handler-proxy</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-resolver</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport-native-unix-common</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport-native-epoll</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
   <build>
     <plugins>
       <plugin>
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
index cc56b6cbb..ccd66445a 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
@@ -23,13 +23,21 @@ import java.sql.DriverManager;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.sql.Timestamp;
 import java.sql.Types;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -57,6 +65,12 @@ import org.apache.tika.utils.StringUtils;
 public class JDBCEmitter extends AbstractEmitter implements Initializable, Closeable {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(JDBCEmitter.class);
+
+    //some file formats do not have time zones...
+    //try both
+    private static final String[] TIKA_DATE_PATTERNS = new String[] {
+            "yyyy-MM-dd'T'HH:mm:ss'Z'","yyyy-MM-dd'T'HH:mm:ss"
+    };
     //the "write" lock is used for creating the table
     private static ReadWriteLock READ_WRITE_LOCK = new ReentrantReadWriteLock();
     //this keeps track of which table + connection string have been created
@@ -73,6 +87,17 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     private PreparedStatement insertStatement;
     private AttachmentStrategy attachmentStrategy = AttachmentStrategy.FIRST_ONLY;
 
+    //emitters are run in a single thread.  If we ever start running them
+    //multithreaded, this will be a big problem.
+    private final DateFormat[] dateFormats;
+
+    public JDBCEmitter() {
+        dateFormats = new DateFormat[TIKA_DATE_PATTERNS.length];
+        int i = 0;
+        for (String p : TIKA_DATE_PATTERNS) {
+            dateFormats[i++] = new SimpleDateFormat(p, Locale.US);
+        }
+    }
     /**
      * This is called immediately after the table is created.
      * The purpose of this is to allow for adding a complex primary key or
@@ -192,6 +217,7 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     }
 
     private void insertAll(String emitKey, List<Metadata> metadataList) throws SQLException {
+
         for (int i = 0; i < metadataList.size(); i++) {
             insertStatement.clearParameters();
             int col = 0;
@@ -207,6 +233,10 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     private void insertFirstOnly(String emitKey, List<Metadata> metadataList) throws SQLException {
         insertStatement.clearParameters();
         int i = 0;
+        DateFormat[] dateFormats = new DateFormat[TIKA_DATE_PATTERNS.length];
+        for (int j = 0; j < TIKA_DATE_PATTERNS.length; j++) {
+            dateFormats[i] = new SimpleDateFormat(TIKA_DATE_PATTERNS[j], Locale.US);
+        }
         insertStatement.setString(++i, emitKey);
         for (Map.Entry<String, String> e : keys.entrySet()) {
             updateValue(insertStatement, ++i, e.getKey(), e.getValue(), 0, metadataList);
@@ -234,7 +264,13 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
         //for now we're only taking the info from the container document.
         Metadata metadata = metadataList.get(metadataListIndex);
         String val = metadata.get(key);
-        switch (type) {
+
+        String lcType = type.toLowerCase(Locale.US);
+        if (lcType.startsWith("varchar")) {
+            updateVarchar(lcType, insertStatement, i, val);
+            return;
+        }
+        switch (lcType) {
             case "string":
                 updateString(insertStatement, i, val);
                 break;
@@ -246,16 +282,73 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
             case "integer":
                 updateInteger(insertStatement, i, val);
                 break;
+            case "bigint":
             case "long":
                 updateLong(insertStatement, i, val);
                 break;
             case "float":
                 updateFloat(insertStatement, i, val);
                 break;
+            case "double":
+                updateDouble(insertStatement, i, val);
+                break;
+            case "timestamp":
+                updateTimestamp(insertStatement, i, val, dateFormats);
+                break;
             default:
                 throw new IllegalArgumentException("Can only process: 'string', 'boolean', 'int' " +
-                        "and 'long' types so far.  Please open a ticket to request other types");
+                        "and 'long' types so far.  Please open a ticket to request: " + type);
+        }
+    }
+
+    private void updateDouble(PreparedStatement insertStatement, int i, String val) throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.DOUBLE);
+            return;
+        }
+        Double d = Double.parseDouble(val);
+        insertStatement.setDouble(i, d);
+    }
+
+    private void updateVarchar(String type, PreparedStatement insertStatement, int i, String val)
+            throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            updateString(insertStatement, i, val);
+            return;
+        }
+        Matcher m = Pattern.compile("varchar\\((\\d+)\\)").matcher(type);
+        if (m.find()) {
+            int len = Integer.parseInt(m.group(1));
+            if (val.length() > len) {
+                int origLength = val.length();
+                val = val.substring(0, len);
+                LOGGER.warn("truncating varchar from {} to {}", origLength, len);
+            }
+            updateString(insertStatement, i, val);
+            return;
+        }
+        LOGGER.warn("couldn't parse varchar?! {}", type);
+        updateString(insertStatement, i, null);
+    }
+
+    private void updateTimestamp(PreparedStatement insertStatement, int i, String val,
+                                 DateFormat[] dateFormats) throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.TIMESTAMP);
+            return;
+        }
+
+        for (DateFormat df : dateFormats) {
+            try {
+                Date d = df.parse(val);
+                insertStatement.setTimestamp(i, new Timestamp(d.getTime()));
+                return;
+            } catch (ParseException e) {
+                //ignore
+            }
         }
+        LOGGER.warn("Couldn't parse {}" + val);
+        insertStatement.setNull(i, Types.TIMESTAMP);
     }
 
     private void updateFloat(PreparedStatement insertStatement, int i, String val)
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
index a34251ebc..b0629fa40 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
@@ -26,6 +26,8 @@ import java.sql.Connection;
 import java.sql.DriverManager;
 import java.sql.ResultSet;
 import java.sql.Statement;
+import java.sql.Timestamp;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -57,6 +59,12 @@ public class JDBCEmitterTest {
         data.add(new String[]{"k1", "true", "k2", "some string1", "k3", "4", "k4", "100"});
         data.add(new String[]{"k1", "false", "k2", "some string2", "k3", "5", "k4", "101"});
         data.add(new String[]{"k1", "true", "k2", "some string3", "k3", "6", "k4", "102"});
+        //test dates with and without timezones
+        data.add(new String[]{"k1", "false", "k2", "some string4", "k3", "7", "k4", "103", "k5",
+                "100002", "k6", "2022-11-04T17:10:15Z"});
+
+        data.add(new String[]{"k1", "true", "k2", "some string5", "k3", "8", "k4", "104", "k5",
+                "100002", "k6", "2022-11-04T17:10:15"});
         int id = 0;
         for (String[] d : data) {
             emitter.emit("id" + id++, Collections.singletonList(m(d)));
@@ -72,6 +80,12 @@ public class JDBCEmitterTest {
                         assertEquals("some string" + (rows + 1), rs.getString(3));
                         assertEquals(rows + 4, rs.getInt(4));
                         assertEquals(100 + rows, rs.getLong(5));
+                        if (rows > 2) {
+                            assertEquals(100002, rs.getLong(6));
+                            Timestamp timestamp = rs.getTimestamp(7);
+                            String str = timestamp.toInstant().atZone(ZoneId.of("UTC")).toString();
+                            assertEquals("2022-11-04T21:10:15Z[UTC]", str);
+                        }
                         rows++;
                     }
                 }
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
index d86903992..7ec4c96db 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
@@ -28,23 +28,26 @@
           k1 boolean,
           k2 varchar(512),
           k3 integer,
-          k4 long);
+          k4 long,
+          k5 bigint,
+          k6 timestamp);
         </createTable>
         <!-- the jdbc emitter always puts ths emitKey value as the first
              item -->
-        <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?);
+        <insert>insert into test (path, k1, k2, k3, k4, k5, k6) values (?,?,?,?,?,?,?);
         </insert>
         <!-- these are the keys in the metadata object.
             The emitKey is added as the first element in the insert statement.
             Then the these values are added in order.
             They must be in the order of the insert statement.
-            The emit key is added as
             -->
         <keys>
           <key k="k1" v="boolean"/>
           <key k="k2" v="string"/>
           <key k="k3" v="int"/>
           <key k="k4" v="long"/>
+          <key k="k5" v="bigint"/>
+          <key k="k6" v="timestamp"/>
         </keys>
         <attachmentStrategy>first_only</attachmentStrategy>
       </params>