You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by le...@apache.org on 2021/06/28 16:52:54 UTC
[gobblin] branch master updated: [GOBBLIN-1479] Add support to
convert zeroDateTime to null when configured (#3319)
This is an automated email from the ASF dual-hosted git repository.
lesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 2407be6 [GOBBLIN-1479] Add support to convert zeroDateTime to null when configured (#3319)
2407be6 is described below
commit 2407be647d3073063c4b589e656cf53f439aca8b
Author: Lemuel Chik <le...@gmail.com>
AuthorDate: Mon Jun 28 09:52:42 2021 -0700
[GOBBLIN-1479] Add support to convert zeroDateTime to null when configured (#3319)
* Add support to convert zero DateTime values to null when configured
In MySQL Connector 8, calling ResultSet.getString(int) on a record with a zero timestamp
returns the string "0000-00-00 00:00:00", even when the connection property
`zeroDateTimeBehavior=CONVERT_TO_NULL` is set.
This change is a workaround to return null for these timestamps when the property is set.
* Updated change to handle other cases for zeroDateTimeBehavior
* Added unit tests
* Added apache license agreement to MockTimestampResultSet
Co-authored-by: Lemuel Chik <lc...@lchik-mn1.linkedin.biz>
---
.../apache/gobblin/source/jdbc/JdbcExtractor.java | 28 ++++-
.../gobblin/source/jdbc/JdbcExtractorTest.java | 140 ++++++++++++++++++---
.../source/jdbc/MockTimestampResultSet.java | 80 ++++++++++++
3 files changed, 230 insertions(+), 18 deletions(-)
diff --git a/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java b/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
index 7a28157..331eb0d 100644
--- a/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
+++ b/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java
@@ -26,6 +26,7 @@ import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
+import java.sql.Timestamp;
import java.sql.Types;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@@ -959,6 +960,9 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
int batchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE, 0);
batchSize = (batchSize == 0 ? ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE : batchSize);
+ String sourceConnProps = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PROPERTIES);
+ boolean convertZeroDateTime = sourceConnProps != null && sourceConnProps.contains("zeroDateTimeBehavior");
+
int recordCount = 0;
while (resultset.next()) {
@@ -967,8 +971,7 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
for (int i = 1; i < numColumns + 1; i++) {
final String columnName = this.getHeaderRecord().get(i - 1);
- jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i));
-
+ jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i, convertZeroDateTime));
}
recordSet.add(jsonObject);
@@ -1028,7 +1031,7 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
* treat tinyint(1) as BIT.
*
* Currently, {@link MysqlExtractor#getDataTypeMap()} uses the information_schema to check types.
- * That does not do the above conversion. {@link #parseColumnAsString(ResultSet, ResultSetMetaData, int)}
+ * That does not do the above conversion. {@link #parseColumnAsString(ResultSet, ResultSetMetaData, int, boolean)}
* which does the above type mapping.
*
* On the other hand, SqlServerExtractor treats BIT columns as Booleans. So we can be in a bind
@@ -1041,7 +1044,8 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
return true;
}
- private String parseColumnAsString(final ResultSet resultset, final ResultSetMetaData resultsetMetadata, int i)
+ private String parseColumnAsString(final ResultSet resultset, final ResultSetMetaData resultsetMetadata, int i,
+ boolean convertZeroDateTime)
throws SQLException {
if (isBlob(resultsetMetadata.getColumnType(i))) {
@@ -1055,6 +1059,18 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
&& convertBitToBoolean()) {
return Boolean.toString(resultset.getBoolean(i));
}
+
+ // Workaround for when `zeroDateTimeBehavior` is set
+ // returns null or a rounded timestamp instead of "0000-00-00 00:00:00" for zero timestamps
+ if (convertZeroDateTime && isTimestamp(resultsetMetadata.getColumnType(i))) {
+ Timestamp ts = resultset.getTimestamp(i);
+ if (ts == null) {
+ return null;
+ } else {
+ return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(ts);
+ }
+ }
+
return resultset.getString(i);
}
@@ -1066,6 +1082,10 @@ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonE
return columnType == Types.CLOB;
}
+ private static boolean isTimestamp(int columnType) {
+ return columnType == Types.TIMESTAMP || columnType == Types.TIMESTAMP_WITH_TIMEZONE;
+ }
+
protected static Command getCommand(String query, JdbcCommandType commandType) {
return new JdbcCommand().build(Arrays.asList(query), commandType);
}
diff --git a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
index 53c35df..9ec5a70 100644
--- a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
+++ b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/JdbcExtractorTest.java
@@ -17,38 +17,40 @@
package org.apache.gobblin.source.jdbc;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
-import java.sql.ResultSet;
-import java.sql.Types;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.commons.lang.StringUtils;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.mockrunner.mock.jdbc.MockResultSet;
-
+import com.mockrunner.mock.jdbc.MockResultSetMetaData;
+import java.sql.ResultSet;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.source.extractor.exception.SchemaException;
import org.apache.gobblin.source.extractor.extract.Command;
import org.apache.gobblin.source.extractor.extract.CommandOutput;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.*;
@Test(groups = { "gobblin.source.jdbc" })
public class JdbcExtractorTest {
- private final static List<MockJdbcColumn> COLUMNS = ImmutableList.of(new MockJdbcColumn("id", "1", Types.INTEGER),
+ private static final List<MockJdbcColumn> COLUMNS = ImmutableList.of(new MockJdbcColumn("id", "1", Types.INTEGER),
new MockJdbcColumn("name", "name_1", Types.VARCHAR), new MockJdbcColumn("age", "20", Types.INTEGER));
+ private static final String TIME_COLUMN = "time";
+
@Test
public void testGetData() throws Exception {
@@ -157,4 +159,114 @@ public class JdbcExtractorTest {
"select a.fromLoc from (Select dest as fromLoc, id from b) as a limit 10");
Assert.assertFalse(result);
}
+
+ /**
+ * Helper function to build MockTimestampResultSet containing a single timestamp column.
+ * @param testCases the list of test cases
+ * @param behavior the expected behavior for the MockTimestampResultSet
+ * @return a MockTimestampResultSet containing the test cases
+ */
+ private ResultSet buildMockResultSetForTimeColumn(List<String> testCases, String behavior) {
+ MockResultSetMetaData mrsMetaData = new MockResultSetMetaData();
+ mrsMetaData.setColumnCount(1);
+ mrsMetaData.setColumnName(1, TIME_COLUMN);
+ mrsMetaData.setColumnType(1, Types.TIMESTAMP);
+
+ MockTimestampResultSet mrs = new MockTimestampResultSet(StringUtils.EMPTY, behavior);
+ mrs.setResultSetMetaData(mrsMetaData);
+ mrs.addColumn(TIME_COLUMN, testCases);
+
+ return mrs;
+ }
+
+ /**
+ * Helper function to test when zeroDateTimeBehavior is set.
+ * @param testCases A LinkedHashMap mapping the input timestamp as a string to the expected output.
+ * We use LinkedHashMap to preserve the order of the inputs/outputs.
+ * @param zeroDateTimeBehavior the expected behavior of a zero timestamp. Should be one of these values:
+ * null, "CONVERT_TO_NULL", "ROUND", "EXCEPTION"
+ * @throws Exception
+ */
+ private void testZeroDateTimeBehavior(LinkedHashMap<String, String> testCases, String zeroDateTimeBehavior) throws Exception {
+ WorkUnitState workUnitState = new WorkUnitState();
+ workUnitState.setId("id");
+ if (zeroDateTimeBehavior != null) {
+ workUnitState.setProp(ConfigurationKeys.SOURCE_CONN_PROPERTIES, "zeroDateTimeBehavior=" + zeroDateTimeBehavior);
+ }
+
+ JdbcExtractor jdbcExtractor = new MysqlExtractor(workUnitState);
+ jdbcExtractor.setHeaderRecord(Collections.singletonList(TIME_COLUMN));
+
+ CommandOutput<JdbcCommand, ResultSet> output = new JdbcCommandOutput();
+ output.put(new JdbcCommand(), buildMockResultSetForTimeColumn(new ArrayList<>(testCases.keySet()), zeroDateTimeBehavior));
+
+
+ Iterator<JsonElement> dataIterator = jdbcExtractor.getData(output);
+
+ // Make sure there is an element in the iterator
+ assertTrue(dataIterator.hasNext());
+
+ // Iterate through the output and verify that they are equal to the expected output
+ Iterator<String> expectedIterator = testCases.values().iterator();
+ while (dataIterator.hasNext()) {
+ JsonElement element = dataIterator.next().getAsJsonObject().get(TIME_COLUMN);
+ String expectedString = expectedIterator.next();
+
+ if (element.isJsonNull()) {
+ assert expectedString == null;
+ } else {
+ assertEquals(element.getAsString(), expectedString);
+ }
+ }
+ }
+
+ // zeroDateTimeBehavior=CONVERT_TO_NULL
+ // Zero timestamps should be converted to null.
+ // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+ public void testZeroDateTimeBehaviorConvertToNull() throws Exception {
+ LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+ testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+ testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+ testCases.put("0000-00-00 00:00:00.0", null);
+
+ testZeroDateTimeBehavior(testCases, "CONVERT_TO_NULL");
+ }
+
+ // zeroDateTimeBehavior=ROUND
+ // Zero timestamps should be converted to "0001-01-01 00:00:00".
+ // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+ public void testZeroDateTimeBehaviorRound() throws Exception {
+ LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+ testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+ testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+ testCases.put("0000-00-00 00:00:00.0", "0001-01-01 00:00:00");
+
+ testZeroDateTimeBehavior(testCases, "ROUND");
+ }
+
+ // zeroDateTimeBehavior=EXCEPTION
+ // Zero timestamps should cause an exception to be thrown.
+ // Other timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss".
+ public void testZeroDateTimeBehaviorException() throws Exception {
+ LinkedHashMap<String, String> testThrows = new LinkedHashMap<>();
+ testThrows.put("0000-00-00 00:00:00", "this value is irrelevant");
+
+ Assert.assertThrows(() -> testZeroDateTimeBehavior(testThrows, "EXCEPTION"));
+
+ LinkedHashMap<String, String> testPasses = new LinkedHashMap<>();
+ testPasses.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+
+ testZeroDateTimeBehavior(testPasses, "EXCEPTION");
+ }
+
+ // zeroDateTimeBehavior is not set.
+ // All timestamps should be returned formatted as "yyyy-MM-dd HH:mm:ss"
+ public void testZeroDateTimeBehaviorNotSpecified() throws Exception {
+ LinkedHashMap<String, String> testCases = new LinkedHashMap<>();
+ testCases.put("2000-01-01 12:34:56.789", "2000-01-01 12:34:56");
+ testCases.put("1999-12-12 13:14:15.16", "1999-12-12 13:14:15");
+ testCases.put("0000-00-00 00:00:00.0", "0000-00-00 00:00:00");
+
+ testZeroDateTimeBehavior(testCases, null);
+ }
}
diff --git a/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java
new file mode 100644
index 0000000..4bf717b
--- /dev/null
+++ b/gobblin-modules/gobblin-sql/src/test/java/org/apache/gobblin/source/jdbc/MockTimestampResultSet.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.source.jdbc;
+
+import com.mockrunner.mock.jdbc.MockResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.text.SimpleDateFormat;
+
+
+/**
+ * A class that mocks the getTimestamp() behavior of a ResultSet that is returned by mysql-connector-8.
+ * This class expects that all entries in the ResultSet are timestamps
+ */
+class MockTimestampResultSet extends MockResultSet {
+ enum ZeroDateTimeBehavior {
+ CONVERT_TO_NULL, ROUND, EXCEPTION
+ };
+
+ private ZeroDateTimeBehavior _behavior;
+ MockTimestampResultSet(String id, String behavior) {
+ super(id);
+ this._behavior = ZeroDateTimeBehavior.EXCEPTION; // default behavior is to throw an exception on a zero timestamp
+ if (behavior != null) {
+ this._behavior = ZeroDateTimeBehavior.valueOf(behavior);
+ }
+ }
+
+ private boolean isZeroTimestamp(String s) {
+ return s.startsWith("0000-00-00 00:00:00");
+ }
+
+ // mimic the behavior of mysql-connector-8's getTimestamp().
+ // see com.mysql.cj.result.AbstractDateTimeValueFactory for details.
+ @Override
+ public Timestamp getTimestamp(int columnIndex) throws SQLException {
+ Object obj = getObject(columnIndex);
+ if (isZeroTimestamp(obj.toString())) {
+ switch (_behavior) {
+ case ROUND:
+ return Timestamp.valueOf("0001-01-01 00:00:00.0");
+ case CONVERT_TO_NULL:
+ return null;
+ case EXCEPTION:
+ return Timestamp.valueOf(obj.toString()); // this throws an exception since timestamps cannot be zero in Java.
+ }
+ }
+
+ return super.getTimestamp(columnIndex);
+ }
+
+ // mimic the behavior of mysql-connector-8's getString()
+ // for timestamps, getString() formats the timestamp to "yyyy-MM-dd HH:mm:ss".
+ @Override
+ public String getString(int columnIndex) throws SQLException {
+ if (this.getMetaData().getColumnType(columnIndex) == Types.TIMESTAMP) {
+ if (isZeroTimestamp(getObject(columnIndex).toString())) {
+ return "0000-00-00 00:00:00";
+ }
+ return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(getTimestamp(columnIndex));
+ }
+ return super.getString(columnIndex);
+ }
+}