You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by le...@apache.org on 2021/06/28 16:52:54 UTC

[gobblin] branch master updated: [GOBBLIN-1479] Add support to convert zeroDateTime to null when configured (#3319)

This is an automated email from the ASF dual-hosted git repository.

lesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 2407be6  [GOBBLIN-1479] Add support to convert zeroDateTime to null when configured (#3319)
2407be6 is described below

commit 2407be647d3073063c4b589e656cf53f439aca8b
Author: Lemuel Chik <le...@gmail.com>
AuthorDate: Mon Jun 28 09:52:42 2021 -0700

    [GOBBLIN-1479] Add support to convert zeroDateTime to null when configured (#3319)
    
    * Add support to convert zero DateTime values to null when configured
    
    In MySQL Connector 8, calling ResultSet.getString(int) on a record with a zero timestamp
    returns the string "0000-00-00 00:00:00", even when the connection property
    `zeroDateTimeBehavior=CONVERT_TO_NULL` is set.
    This change is a workaround to return null for these timestamps when the property is set.
    
    * Updated change to handle other cases for zeroDateTimeBehavior
    
    * Added unit tests
    
    * Added apache license agreement to MockTimestampResultSet
    
    Co-authored-by: Lemuel Chik <lc...@lchik-mn1.linkedin.biz>
---
 .../apache/gobblin/source/jdbc/JdbcExtractor.java  |  28 ++++-
 .../gobblin/source/jdbc/JdbcExtractorTest.java     | 140 ++++++++++++++++++---
 .../source/jdbc/MockTimestampResultSet.java        |  80 ++++++++++++
 3 files changed, 230 insertions(+), 18 deletions(-)

diff --git a/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java b/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
index 7a28157..331eb0d 100644
--- a/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
+++ b/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
@@ -26,6 +26,7 @@ import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.sql.Timestamp;
 import java.sql.Types;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
@@ -959,6 +960,9 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
       int batchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE, 0);
       batchSize = (batchSize == 0 ? ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE : batchSize);
 
+      String sourceConnProps = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PROPERTIES);
+      boolean convertZeroDateTime = sourceConnProps != null && sourceConnProps.contains("zeroDateTimeBehavior");
+
       int recordCount = 0;
       while (resultset.next()) {
 
@@ -967,8 +971,7 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
 
         for (int i = 1; i < numColumns + 1; i++) {
           final String columnName = this.getHeaderRecord().get(i - 1);
-          jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i));
-
+          jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i, convertZeroDateTime));
         }
 
         recordSet.add(jsonObject);
@@ -1028,7 +1031,7 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
    * treat tinyint(1) as BIT.
    *
    * Currently, {@link MysqlExtractor#getDataTypeMap()} uses the information_schema to check types.
-   * That does not do the above conversion. {@link #parseColumnAsString(ResultSet, ResultSetMetaData, int)}
+   * That does not do the above conversion. {@link #parseColumnAsString(ResultSet, ResultSetMetaData, int, boolean)}
    * which does the above type mapping.
    *
    * On the other hand, SqlServerExtractor treats BIT columns as Booleans. So we can be in a bind
@@ -1041,7 +1044,8 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
     return true;
   }
 
-  private String parseColumnAsString(final ResultSet resultset, final ResultSetMetaData resultsetMetadata, int i)
+  private String parseColumnAsString(final ResultSet resultset, final ResultSetMetaData resultsetMetadata, int i,
+      boolean convertZeroDateTime)
       throws SQLException {
 
     if (isBlob(resultsetMetadata.getColumnType(i))) {
@@ -1055,6 +1059,18 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
         && convertBitToBoolean()) {
       return Boolean.toString(resultset.getBoolean(i));
     }
+
+    // Workaround for when `zeroDateTimeBehavior` is set
+    // returns null or a rounded timestamp instead of "0000-00-00 00:00:00" for zero timestamps
+    if (convertZeroDateTime && isTimestamp(resultsetMetadata.getColumnType(i))) {
+        Timestamp ts = resultset.getTimestamp(i);
+        if (ts == null) {
+          return null;
+        } else {
+          return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(ts);
+        }
+    }
+
     return resultset.getString(i);
   }
 
@@ -1066,6 +1082,10 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
     return columnType == Types.CLOB;
   }
 
+  private static boolean isTimestamp(int columnType) {
+    return columnType == Types.TIMESTAMP || columnType == Types.TIMESTAMP_WITH_TIMEZONE;
+  }
+
   protected static Command getCommand(String query, JdbcCommandType commandType) {
     return new JdbcCommand().build(Arrays.asList(query), commandType);
   }
diff --git a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
index 53c35df..9ec5a70 100644
--- a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
+++ b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
@@ -17,38 +17,40 @@
 
 package org.apache.gobblin.source.jdbc;
 
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
-import java.sql.ResultSet;
-import java.sql.Types;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.commons.lang.StringUtils;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
 import com.mockrunner.mock.jdbc.MockResultSet;
-
+import com.mockrunner.mock.jdbc.MockResultSetMetaData;
+import java.sql.ResultSet;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
 import org.apache.gobblin.configuration.ConfigurationKeys;
 import org.apache.gobblin.configuration.State;
 import org.apache.gobblin.configuration.WorkUnitState;
 import org.apache.gobblin.source.extractor.exception.SchemaException;
 import org.apache.gobblin.source.extractor.extract.Command;
 import org.apache.gobblin.source.extractor.extract.CommandOutput;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.*;
 
 
 @Test(groups = { "gobblin.source.jdbc" })
 public class JdbcExtractorTest {
 
-  private final static List<MockJdbcColumn> COLUMNS = ImmutableList.of(new MockJdbcColumn("id", "1", Types.INTEGER),
+  private static final List<MockJdbcColumn> COLUMNS = ImmutableList.of(new MockJdbcColumn("id", "1", Types.INTEGER),
       new MockJdbcColumn("name", "name_1", Types.VARCHAR), new MockJdbcColumn("age", "20", Types.INTEGER));
 
+  private static final String TIME_COLUMN = "time";
+
   @Test
   public void testGetData() throws Exception {
 
@@ -157,4 +159,114 @@ public class JdbcExtractorTest {
         "select a.fromLoc from (Select dest as fromLoc, id from b) as a limit 10");
     Assert.assertFalse(result);
   }
+
+  /**
+   * Helper function to build MockTimestampResultSet containing a single timestamp column.
+   * @param testCases the list of test cases
+   * @param behavior the expected behavior for the MockTimestampResultSet
+   * @return a MockTimestampResultSet containing the test cases
+   */
+  private ResultSet buildMockResultSetForTimeColumn(List<String> testCases, String behavior) {
+    MockResultSetMetaData mrsMetaData = new MockResultSetMetaData();
+    mrsMetaData.setColumnCount(1);
+    mrsMetaData.setColumnName(1, TIME_COLUMN);
+    mrsMetaData.setColumnType(1, Types.TIMESTAMP);
+
+    MockTimestampResultSet mrs = new MockTimestampResultSet(StringUtils.EMPTY, behavior);
+    mrs.setResultSetMetaData(mrsMetaData);
+    mrs.addColumn(TIME_COLUMN, testCases);
+
+    return mrs;
+  }
+
+  /**
+   * Helper function to test when zeroDateTimeBehavior is set.
+   * @param testCases A LinkedHashMap mapping the input timestamp as a string to the expected output.
+   *                  We use LinkedHashMap to preserve the order of the inputs/outputs.
+   * @param zeroDateTimeBehavior the expected behavior of a zero timestamp. Should be one of these values:
+   *                             null, "CONVERT_TO_NULL", "ROUND", "EXCEPTION"
+   * @throws Exception
+   */
+  private void testZeroDateTimeBehavior(LinkedHashMap<String, String> testCases, String zeroDateTimeBehavior) throws Exception {
+    WorkUnitState workUnitState = new WorkUnitState();
+    workUnitState.setId("id");
+    if (zeroDateTimeBehavior != null) {
+      workUnitState.setProp(ConfigurationKeys.SOURCE_CONN_PROPERTIES, "zeroDateTimeBehavior=" + zeroDateTimeBehavior);
+    }
+
+    JdbcExtractor jdbcExtractor = new MysqlExtractor(workUnitState);
+    jdbcExtractor.setHeaderRecord(Collections.singletonList(TIME_COLUMN));
+
+    CommandOutput<JdbcCommand, ResultSet> output = new JdbcCommandOutput();
+    output.put(new JdbcCommand(), buildMockResultSetForTimeColumn(new ArrayList<>(testCases.keySet()), zeroDateTimeBehavior));
+
+
+    Iterator<JsonElement> dataIterator = jdbcExtractor.getData(output);
+
+    // Make sure there is an element in the iterator
+    assertTrue(dataIterator.hasNext());
+
+    // Iterate through the output and verify that they are equal to the expected output
+    Iterator<String> expectedIterator = testCases.values().iterator();
+    while (dataIterator.hasNext()) {
+      JsonElement element = dataIterator.next().getAsJsonObject().get(TIME_COLUMN);
+      String expectedString = expectedIterator.next();
+
+      if (element.isJsonNull()) {
+        assert expectedString == null;
+      } else {
+        assertEquals(element.getAsString(), expectedString);
+      }
+    }
+  }
+
+  // zeroDateTimeBehavior=CONVERT_TO_NULL
+  // Zero timestamps should be converted to null.
+  // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+  public void testZeroDateTimeBehaviorConvertToNull() throws Exception {
+    LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+    testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+    testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+    testCases.put("0000-00-00 00:00:00.0", null);
+
+    testZeroDateTimeBehavior(testCases, "CONVERT_TO_NULL");
+  }
+
+  // zeroDateTimeBehavior=ROUND
+  // Zero timestamps should be converted to "0001-01-01 00:00:00".
+  // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+  public void testZeroDateTimeBehaviorRound() throws Exception {
+    LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+    testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+    testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+    testCases.put("0000-00-00 00:00:00.0", "0001-01-01 00:00:00");
+
+    testZeroDateTimeBehavior(testCases, "ROUND");
+  }
+
+  // zeroDateTimeBehavior=EXCEPTION
+  // Zero timestamps should cause an exception to be thrown.
+  // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+  public void testZeroDateTimeBehaviorException() throws Exception {
+    LinkedHashMap<String, String> testThrows = new LinkedHashMap<>();
+    testThrows.put("0000-00-00 00:00:00", "this value is irrelevant");
+
+    Assert.assertThrows(() -> testZeroDateTimeBehavior(testThrows, "EXCEPTION"));
+
+    LinkedHashMap<String, String> testPasses = new LinkedHashMap<>();
+    testPasses.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+
+    testZeroDateTimeBehavior(testPasses, "EXCEPTION");
+  }
+
+  // zeroDateTimeBehavior is not set.
+  // All timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss"
+  public void testZeroDateTimeBehaviorNotSpecified() throws Exception {
+    LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+    testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+    testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+    testCases.put("0000-00-00 00:00:00.0", "0000-00-00 00:00:00");
+
+    testZeroDateTimeBehavior(testCases, null);
+  }
 }
diff --git a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java
new file mode 100644
index 0000000..4bf717b
--- /dev/null
+++ b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.source.jdbc;
+
+import com.mockrunner.mock.jdbc.MockResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.text.SimpleDateFormat;
+
+
+/**
+ * A class that mocks the getTimestamp() behavior of a ResultSet that is returned by mysql-connector-8.
+ * This class expects that all entries in the ResultSet are timestamps
+ */
+class MockTimestampResultSet extends MockResultSet {
+  enum ZeroDateTimeBehavior {
+    CONVERT_TO_NULL, ROUND, EXCEPTION
+  };
+
+  private ZeroDateTimeBehavior _behavior;
+  MockTimestampResultSet(String id, String behavior) {
+    super(id);
+    this._behavior = ZeroDateTimeBehavior.EXCEPTION; // default behavior is to throw an exception on a zero timestamp
+    if (behavior != null) {
+      this._behavior = ZeroDateTimeBehavior.valueOf(behavior);
+    }
+  }
+
+  private boolean isZeroTimestamp(String s) {
+    return s.startsWith("0000-00-00 00:00:00");
+  }
+
+  // mimic the behavior of mysql-connector-8's getTimestamp().
+  // see com.mysql.cj.result.AbstractDateTimeValueFactory for details.
+  @Override
+  public Timestamp getTimestamp(int columnIndex) throws SQLException {
+    Object obj = getObject(columnIndex);
+    if (isZeroTimestamp(obj.toString())) {
+      switch (_behavior) {
+        case ROUND:
+          return Timestamp.valueOf("0001-01-01 00:00:00.0");
+        case CONVERT_TO_NULL:
+          return null;
+        case EXCEPTION:
+          return Timestamp.valueOf(obj.toString()); // this throws an exception since timestamps cannot be zero in Java.
+      }
+    }
+
+    return super.getTimestamp(columnIndex);
+  }
+
+  // mimic the behavior of mysql-connector-8's getString()
+  // for timestamps, getString() formats the timestamp to "yyyy-MM-dd HH:mm:ss".
+  @Override
+  public String getString(int columnIndex) throws SQLException {
+    if (this.getMetaData().getColumnType(columnIndex) == Types.TIMESTAMP) {
+      if (isZeroTimestamp(getObject(columnIndex).toString())) {
+        return "0000-00-00 00:00:00";
+      }
+      return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(getTimestamp(columnIndex));
+    }
+    return super.getString(columnIndex);
+  }
+}