You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/01/12 23:45:32 UTC
[incubator-hudi] branch master updated: [HUDI-502] provide a custom
time zone definition for TimestampBasedKeyGenerator (#1188)
This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push:
new a44c61b [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188)
a44c61b is described below
commit a44c61b81356e93340710ccc0022e576b7b6e077
Author: openopen2 <a2...@outlook.com>
AuthorDate: Mon Jan 13 07:45:23 2020 +0800
[HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188)
---
.../apache/hudi/common/util/SchemaTestUtil.java | 4 ++
.../src/test/resources/timestamp-test-evolved.avsc | 26 +++++++
.../keygen/TimestampBasedKeyGenerator.java | 11 ++-
.../utilities/TestTimestampBasedKeyGenerator.java | 83 ++++++++++++++++++++++
4 files changed, 122 insertions(+), 2 deletions(-)
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
index 2b3de27..18d6b73 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
@@ -169,6 +169,10 @@ public class SchemaTestUtil {
return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avsc"));
}
+ public static Schema getTimestampEvolvedSchema() throws IOException {
+ return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc"));
+ }
+
public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String commitTime,
String fileId) throws IOException {
TestRecord record = new TestRecord(commitTime, recordNumber, fileId);
diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
new file mode 100644
index 0000000..421c672
--- /dev/null
+++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+ "namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "field1", "type": ["null", "string"], "default": null},
+ {"name": "createTime", "type": ["null", "string"], "default": null}
+ ]
+}
\ No newline at end of file
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
index 6d3a6e3..7f1380e 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
@@ -51,6 +51,10 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
private final String outputDateFormat;
+ // TimeZone detailed settings reference
+ // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html
+ private final TimeZone timeZone;
+
/**
* Supported configs.
*/
@@ -62,6 +66,8 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
"hoodie.deltastreamer.keygen.timebased.input.dateformat";
private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
"hoodie.deltastreamer.keygen.timebased.output.dateformat";
+ private static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
+ "hoodie.deltastreamer.keygen.timebased.timezone";
}
public TimestampBasedKeyGenerator(TypedProperties config) {
@@ -70,12 +76,13 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
+ this.timeZone = TimeZone.getTimeZone(config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT"));
if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
DataSourceUtils.checkRequiredProperties(config,
Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
this.inputDateFormat = new SimpleDateFormat(config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
- this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+ this.inputDateFormat.setTimeZone(timeZone);
}
}
@@ -86,7 +93,7 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
partitionVal = 1L;
}
SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat);
- partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+ partitionPathFormat.setTimeZone(timeZone);
try {
long unixTime;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java
new file mode 100644
index 0000000..cb0c822
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.util.SchemaTestUtil;
+import org.apache.hudi.common.util.TypedProperties;
+import org.apache.hudi.utilities.keygen.TimestampBasedKeyGenerator;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestTimestampBasedKeyGenerator {
+ private Schema schema;
+ private GenericRecord baseRecord;
+ private TypedProperties properties = new TypedProperties();
+
+ @Before
+ public void initialize() throws IOException {
+ schema = SchemaTestUtil.getTimestampEvolvedSchema();
+ baseRecord = SchemaTestUtil
+ .generateAvroRecordFromJson(schema, 1, "001", "f1");
+
+ properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1");
+ properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime");
+ properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "false");
+ }
+
+ private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone) {
+ properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", timestampType);
+ properties.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", dateFormat);
+ properties.setProperty("hoodie.deltastreamer.keygen.timebased.timezone", timezone);
+ return properties;
+ }
+
+ @Test
+ public void testTimestampBasedKeyGenerator() {
+ // timezone is GMT+8:00
+ baseRecord.put("createTime", 1578283932000L);
+ properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00");
+ HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+ assertEquals(hk1.getPartitionPath(), "2020-01-06 12");
+
+ // timezone is GMT
+ properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT");
+ HoodieKey hk2 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+ assertEquals(hk2.getPartitionPath(), "2020-01-06 04");
+
+ // timestamp is DATE_STRING, timezone is GMT+8:00
+ baseRecord.put("createTime", "2020-01-06 12:12:12");
+ properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00");
+ properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
+ HoodieKey hk3 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+ assertEquals(hk3.getPartitionPath(), "2020-01-06 12");
+
+ // timezone is GMT
+ properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT");
+ HoodieKey hk4 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+ assertEquals(hk4.getPartitionPath(), "2020-01-06 12");
+ }
+}