You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/04 20:06:52 UTC

[tika] 02/02: TIKA-3916 -- move nearly all dependency mgmt into parent pom

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ec693f417542e868968069a847f924ef5f7f9467
Author: tballison <ta...@apache.org>
AuthorDate: Fri Nov 4 16:06:38 2022 -0400

    TIKA-3916 -- move nearly all dependency mgmt into parent pom
---
 tika-bundles/pom.xml                               |   9 --
 tika-example/pom.xml                               |  10 --
 tika-parent/pom.xml                                | 165 ++++++++++++++++++---
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +
 tika-parsers/tika-parsers-ml/tika-dl/pom.xml       |  34 -----
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |  19 ---
 tika-pipes/pom.xml                                 | 106 -------------
 .../tika/pipes/emitter/jdbc/JDBCEmitter.java       |  97 +++++++++++-
 .../tika/pipes/emitter/jdbc/JDBCEmitterTest.java   |  14 ++
 .../resources/configs/tika-config-jdbc-emitter.xml |   9 +-
 10 files changed, 264 insertions(+), 201 deletions(-)

diff --git a/tika-bundles/pom.xml b/tika-bundles/pom.xml
index 69dfefd6e..45d19f993 100644
--- a/tika-bundles/pom.xml
+++ b/tika-bundles/pom.xml
@@ -35,15 +35,6 @@
   <modules>
     <module>tika-bundle-standard</module>
   </modules>
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.compendium</artifactId>
-        <version>${osgi.compendium.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
 
   <scm>
     <tag>2.2.1-rc2</tag>
diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index cc1f66fe2..b140b8404 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -33,16 +33,6 @@
   <name>Apache Tika examples</name>
   <url>https://tika.apache.org/</url>
 
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.compendium</artifactId>
-        <version>${osgi.compendium.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
-
   <!-- List of dependencies that we depend on for the examples. See the full list of Tika
        modules and how to use them at http://mvnrepository.com/artifact/org.apache.tika.-->
   <dependencies>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index c4582e8a8..d49c3639a 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -404,17 +404,42 @@
         <artifactId>biz.aQute.bndlib</artifactId>
         <version>6.3.1</version>
       </dependency>
-      <!-- for bndlib -->
       <dependency>
-        <groupId>org.osgi</groupId>
-        <artifactId>org.osgi.util.function</artifactId>
-        <version>1.2.0</version>
+        <groupId>com.amazonaws</groupId>
+        <artifactId>aws-java-sdk-s3</artifactId>
+        <version>${aws.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-storage-blob</artifactId>
+        <version>12.20.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-core</artifactId>
+        <version>1.33.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.azure</groupId>
+        <artifactId>azure-core-http-netty</artifactId>
+        <version>1.12.6</version>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-transport-native-epoll</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>com.drewnoakes</groupId>
         <artifactId>metadata-extractor</artifactId>
         <version>${metadata.extractor.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.google.cloud</groupId>
+        <artifactId>google-cloud-storage</artifactId>
+        <version>${google.cloud.version}</version>
+      </dependency>
       <dependency>
         <groupId>com.mchange</groupId>
         <artifactId>c3p0</artifactId>
@@ -512,6 +537,31 @@
         <artifactId>icu4j</artifactId>
         <version>${icu4j.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.common</groupId>
+        <artifactId>common-io</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-bmp</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-jpeg</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-psd</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twelvemonkeys.imageio</groupId>
+        <artifactId>imageio-tiff</artifactId>
+        <version>${twelvemonkeys.version}</version>
+      </dependency>
       <dependency>
         <groupId>commons-cli</groupId>
         <artifactId>commons-cli</artifactId>
@@ -522,16 +572,6 @@
         <artifactId>commons-codec</artifactId>
         <version>${commons.codec.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-collections4</artifactId>
-        <version>${commons.collections4.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-csv</artifactId>
-        <version>${commons.csv.version}</version>
-      </dependency>
       <dependency>
         <groupId>commons-io</groupId>
         <artifactId>commons-io</artifactId>
@@ -543,9 +583,24 @@
         <version>${commons.logging.version}</version>
       </dependency>
       <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-math3</artifactId>
-        <version>${commons.math3.version}</version>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-buffer</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec-http</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-codec-http2</artifactId>
+        <version>${netty.version}</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>
@@ -562,6 +617,36 @@
         <artifactId>netty-transport-native-unix-common</artifactId>
         <version>${netty.version}</version>
       </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-handler-proxy</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-resolver</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-unix-common</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-epoll</artifactId>
+        <version>${netty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.projectreactor</groupId>
+        <artifactId>reactor-core</artifactId>
+        <version>3.4.24</version>
+      </dependency>
       <dependency>
         <groupId>io.projectreactor.netty</groupId>
         <artifactId>reactor-netty-core</artifactId>
@@ -602,11 +687,21 @@
         <artifactId>jna</artifactId>
         <version>${jna.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-collections4</artifactId>
+        <version>${commons.collections4.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-compress</artifactId>
         <version>${commons.compress.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-csv</artifactId>
+        <version>${commons.csv.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-exec</artifactId>
@@ -617,6 +712,11 @@
         <artifactId>commons-lang3</artifactId>
         <version>${commons.lang3.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-math3</artifactId>
+        <version>${commons.math3.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.cxf</groupId>
         <artifactId>cxf-rt-frontend-jaxrs</artifactId>
@@ -764,6 +864,11 @@
         <artifactId>jetty-client</artifactId>
         <version>${jetty.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.freemarker</groupId>
+        <artifactId>freemarker</artifactId>
+        <version>2.3.31</version>
+      </dependency>
       <dependency>
         <groupId>org.glassfish.jaxb</groupId>
         <artifactId>jaxb-runtime</artifactId>
@@ -818,11 +923,22 @@
         <artifactId>ops4j-base-util-property</artifactId>
         <version>${ops4j.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.osgi</groupId>
+        <artifactId>org.osgi.compendium</artifactId>
+        <version>${osgi.compendium.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.osgi</groupId>
         <artifactId>org.osgi.core</artifactId>
         <version>${osgi.core.version}</version>
       </dependency>
+      <!-- for bndlib -->
+      <dependency>
+        <groupId>org.osgi</groupId>
+        <artifactId>org.osgi.util.function</artifactId>
+        <version>1.2.0</version>
+      </dependency>
       <dependency>
         <groupId>org.quartz-scheduler</groupId>
         <artifactId>quartz</artifactId>
@@ -855,7 +971,20 @@
         <version>${test.containers.version}</version>
         <scope>test</scope>
       </dependency>
-
+      <!-- need to specify this to avoid
+      version clash within ctakes-core 4.0.0 -->
+      <dependency>
+        <groupId>org.apache.uima</groupId>
+        <artifactId>uimafit-core</artifactId>
+        <version>${uimafit.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.uima</groupId>
+        <artifactId>uimaj-core</artifactId>
+        <version>${uima.version}</version>
+        <scope>provided</scope>
+      </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>
         <artifactId>snappy-java</artifactId>
diff --git a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml b/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
index 4b5760a6f..d3d97dc5f 100644
--- a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
@@ -32,6 +32,8 @@
   <url>http://maven.apache.org</url>
 
 
+  <!-- we're not maintaining this module.
+  Keep this here instead of cluttering the parent pom -->
   <dependencyManagement>
     <dependencies>
       <dependency>
diff --git a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml b/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
index 1d8e2cd82..d12002ec8 100644
--- a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
@@ -31,40 +31,6 @@
   <name>Apache Tika Deep Learning (powered by DL4J)</name>
   <url>http://maven.apache.org</url>
 
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>com.twelvemonkeys.common</groupId>
-        <artifactId>common-io</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-bmp</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-jpeg</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-psd</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twelvemonkeys.imageio</groupId>
-        <artifactId>imageio-tiff</artifactId>
-        <version>${twelvemonkeys.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>org.freemarker</groupId>
-        <artifactId>freemarker</artifactId>
-        <version>2.3.31</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
index b33250eb9..9301a3e46 100644
--- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
@@ -29,25 +29,6 @@
     <name>Apache Tika natural language process module</name>
 
 
-    <dependencyManagement>
-        <dependencies>
-            <!-- need to specify this to avoid
-                  version clash within ctakes-core 4.0.0 -->
-            <dependency>
-                <groupId>org.apache.uima</groupId>
-                <artifactId>uimafit-core</artifactId>
-                <version>${uimafit.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.uima</groupId>
-                <artifactId>uimaj-core</artifactId>
-                <version>${uima.version}</version>
-                <scope>provided</scope>
-            </dependency>
-        </dependencies>
-    </dependencyManagement>
-
     <dependencies>
         <dependency>
             <groupId>${project.groupId}</groupId>
diff --git a/tika-pipes/pom.xml b/tika-pipes/pom.xml
index e79e26450..1b4eb9f66 100644
--- a/tika-pipes/pom.xml
+++ b/tika-pipes/pom.xml
@@ -38,112 +38,6 @@
     <module>tika-async-cli</module>
   </modules>
 
-  <dependencyManagement>
-    <!-- this is caused by convergence errors in
-        azure-storage-blob since 12.15.0 and its dependencies.
-        Hopefully, we can get rid of this all with the
-        next upgrade.
-
-        maven enforcer, for reasons unknown, is not
-        seeing netty-transport-native-epoll here, and it
-        has to be excluded from azure-storage-blob
-        in the submodules.
-        -->
-    <dependencies>
-      <dependency>
-        <groupId>com.amazonaws</groupId>
-        <artifactId>aws-java-sdk-s3</artifactId>
-        <version>${aws.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-storage-blob</artifactId>
-        <!-- when upgrading this, remove it from ossindex exclusions in parent -->
-        <version>12.20.0</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-core</artifactId>
-        <version>1.33.0</version>
-      </dependency>
-      <dependency>
-        <groupId>com.azure</groupId>
-        <artifactId>azure-core-http-netty</artifactId>
-        <version>1.12.6</version>
-        <exclusions>
-            <exclusion>
-                <groupId>io.netty</groupId>
-                <artifactId>netty-transport-native-epoll</artifactId>
-            </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>com.google.cloud</groupId>
-        <artifactId>google-cloud-storage</artifactId>
-        <version>${google.cloud.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.projectreactor</groupId>
-        <artifactId>reactor-core</artifactId>
-        <version>3.4.24</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-buffer</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec-http</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-codec-http2</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-common</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-handler</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-handler-proxy</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-resolver</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport-native-unix-common</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>io.netty</groupId>
-        <artifactId>netty-transport-native-epoll</artifactId>
-        <version>${netty.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
   <build>
     <plugins>
       <plugin>
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
index cc56b6cbb..ccd66445a 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
@@ -23,13 +23,21 @@ import java.sql.DriverManager;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.sql.Timestamp;
 import java.sql.Types;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -57,6 +65,12 @@ import org.apache.tika.utils.StringUtils;
 public class JDBCEmitter extends AbstractEmitter implements Initializable, Closeable {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(JDBCEmitter.class);
+
+    //some file formats do not have time zones...
+    //try both
+    private static final String[] TIKA_DATE_PATTERNS = new String[] {
+            "yyyy-MM-dd'T'HH:mm:ss'Z'","yyyy-MM-dd'T'HH:mm:ss"
+    };
     //the "write" lock is used for creating the table
     private static ReadWriteLock READ_WRITE_LOCK = new ReentrantReadWriteLock();
     //this keeps track of which table + connection string have been created
@@ -73,6 +87,17 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     private PreparedStatement insertStatement;
     private AttachmentStrategy attachmentStrategy = AttachmentStrategy.FIRST_ONLY;
 
+    //emitters are run in a single thread.  If we ever start running them
+    //multithreaded, this will be a big problem.
+    private final DateFormat[] dateFormats;
+
+    public JDBCEmitter() {
+        dateFormats = new DateFormat[TIKA_DATE_PATTERNS.length];
+        int i = 0;
+        for (String p : TIKA_DATE_PATTERNS) {
+            dateFormats[i++] = new SimpleDateFormat(p, Locale.US);
+        }
+    }
     /**
      * This is called immediately after the table is created.
      * The purpose of this is to allow for adding a complex primary key or
@@ -192,6 +217,7 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     }
 
     private void insertAll(String emitKey, List<Metadata> metadataList) throws SQLException {
+
         for (int i = 0; i < metadataList.size(); i++) {
             insertStatement.clearParameters();
             int col = 0;
@@ -207,6 +233,10 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
     private void insertFirstOnly(String emitKey, List<Metadata> metadataList) throws SQLException {
         insertStatement.clearParameters();
         int i = 0;
+        DateFormat[] dateFormats = new DateFormat[TIKA_DATE_PATTERNS.length];
+        for (int j = 0; j < TIKA_DATE_PATTERNS.length; j++) {
+            dateFormats[i] = new SimpleDateFormat(TIKA_DATE_PATTERNS[j], Locale.US);
+        }
         insertStatement.setString(++i, emitKey);
         for (Map.Entry<String, String> e : keys.entrySet()) {
             updateValue(insertStatement, ++i, e.getKey(), e.getValue(), 0, metadataList);
@@ -234,7 +264,13 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
         //for now we're only taking the info from the container document.
         Metadata metadata = metadataList.get(metadataListIndex);
         String val = metadata.get(key);
-        switch (type) {
+
+        String lcType = type.toLowerCase(Locale.US);
+        if (lcType.startsWith("varchar")) {
+            updateVarchar(lcType, insertStatement, i, val);
+            return;
+        }
+        switch (lcType) {
             case "string":
                 updateString(insertStatement, i, val);
                 break;
@@ -246,16 +282,73 @@ public class JDBCEmitter extends AbstractEmitter implements Initializable, Close
             case "integer":
                 updateInteger(insertStatement, i, val);
                 break;
+            case "bigint":
             case "long":
                 updateLong(insertStatement, i, val);
                 break;
             case "float":
                 updateFloat(insertStatement, i, val);
                 break;
+            case "double":
+                updateDouble(insertStatement, i, val);
+                break;
+            case "timestamp":
+                updateTimestamp(insertStatement, i, val, dateFormats);
+                break;
             default:
                 throw new IllegalArgumentException("Can only process: 'string', 'boolean', 'int' " +
-                        "and 'long' types so far.  Please open a ticket to request other types");
+                        "and 'long' types so far.  Please open a ticket to request: " + type);
+        }
+    }
+
+    private void updateDouble(PreparedStatement insertStatement, int i, String val) throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.DOUBLE);
+            return;
+        }
+        Double d = Double.parseDouble(val);
+        insertStatement.setDouble(i, d);
+    }
+
+    private void updateVarchar(String type, PreparedStatement insertStatement, int i, String val)
+            throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            updateString(insertStatement, i, val);
+            return;
+        }
+        Matcher m = Pattern.compile("varchar\\((\\d+)\\)").matcher(type);
+        if (m.find()) {
+            int len = Integer.parseInt(m.group(1));
+            if (val.length() > len) {
+                int origLength = val.length();
+                val = val.substring(0, len);
+                LOGGER.warn("truncating varchar from {} to {}", origLength, len);
+            }
+            updateString(insertStatement, i, val);
+            return;
+        }
+        LOGGER.warn("couldn't parse varchar?! {}", type);
+        updateString(insertStatement, i, null);
+    }
+
+    private void updateTimestamp(PreparedStatement insertStatement, int i, String val,
+                                 DateFormat[] dateFormats) throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.TIMESTAMP);
+            return;
+        }
+
+        for (DateFormat df : dateFormats) {
+            try {
+                Date d = df.parse(val);
+                insertStatement.setTimestamp(i, new Timestamp(d.getTime()));
+                return;
+            } catch (ParseException e) {
+                //ignore
+            }
         }
+        LOGGER.warn("Couldn't parse {}" + val);
+        insertStatement.setNull(i, Types.TIMESTAMP);
     }
 
     private void updateFloat(PreparedStatement insertStatement, int i, String val)
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
index a34251ebc..b0629fa40 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
@@ -26,6 +26,8 @@ import java.sql.Connection;
 import java.sql.DriverManager;
 import java.sql.ResultSet;
 import java.sql.Statement;
+import java.sql.Timestamp;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -57,6 +59,12 @@ public class JDBCEmitterTest {
         data.add(new String[]{"k1", "true", "k2", "some string1", "k3", "4", "k4", "100"});
         data.add(new String[]{"k1", "false", "k2", "some string2", "k3", "5", "k4", "101"});
         data.add(new String[]{"k1", "true", "k2", "some string3", "k3", "6", "k4", "102"});
+        //test dates with and without timezones
+        data.add(new String[]{"k1", "false", "k2", "some string4", "k3", "7", "k4", "103", "k5",
+                "100002", "k6", "2022-11-04T17:10:15Z"});
+
+        data.add(new String[]{"k1", "true", "k2", "some string5", "k3", "8", "k4", "104", "k5",
+                "100002", "k6", "2022-11-04T17:10:15"});
         int id = 0;
         for (String[] d : data) {
             emitter.emit("id" + id++, Collections.singletonList(m(d)));
@@ -72,6 +80,12 @@ public class JDBCEmitterTest {
                         assertEquals("some string" + (rows + 1), rs.getString(3));
                         assertEquals(rows + 4, rs.getInt(4));
                         assertEquals(100 + rows, rs.getLong(5));
+                        if (rows > 2) {
+                            assertEquals(100002, rs.getLong(6));
+                            Timestamp timestamp = rs.getTimestamp(7);
+                            String str = timestamp.toInstant().atZone(ZoneId.of("UTC")).toString();
+                            assertEquals("2022-11-04T21:10:15Z[UTC]", str);
+                        }
                         rows++;
                     }
                 }
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
index d86903992..7ec4c96db 100644
--- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
@@ -28,23 +28,26 @@
           k1 boolean,
           k2 varchar(512),
           k3 integer,
-          k4 long);
+          k4 long,
+          k5 bigint,
+          k6 timestamp);
         </createTable>
         <!-- the jdbc emitter always puts ths emitKey value as the first
              item -->
-        <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?);
+        <insert>insert into test (path, k1, k2, k3, k4, k5, k6) values (?,?,?,?,?,?,?);
         </insert>
         <!-- these are the keys in the metadata object.
             The emitKey is added as the first element in the insert statement.
             Then the these values are added in order.
             They must be in the order of the insert statement.
-            The emit key is added as
             -->
         <keys>
           <key k="k1" v="boolean"/>
           <key k="k2" v="string"/>
           <key k="k3" v="int"/>
           <key k="k4" v="long"/>
+          <key k="k5" v="bigint"/>
+          <key k="k6" v="timestamp"/>
         </keys>
         <attachmentStrategy>first_only</attachmentStrategy>
       </params>