You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/10/29 03:30:06 UTC

[hudi] branch master updated: [HUDI-1274] Make hive synchronization supports hourly partition (#2122)

This is an automated email from the ASF dual-hosted git repository.

vinoyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 736a940  [HUDI-1274] Make hive synchronization supports hourly partition (#2122)
736a940 is described below

commit 736a9408549f66014bfdfdc72832be11aa9fd39b
Author: liujinhui <96...@qq.com>
AuthorDate: Thu Oct 29 11:29:50 2020 +0800

    [HUDI-1274] Make hive synchronization supports hourly partition (#2122)
---
 .../SlashEncodedHourPartitionValueExtractor.java   | 67 ++++++++++++++++++++++
 .../hudi/hive/TestPartitionValueExtractor.java     | 38 ++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SlashEncodedHourPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SlashEncodedHourPartitionValueExtractor.java
new file mode 100644
index 0000000..dcb2c6d
--- /dev/null
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SlashEncodedHourPartitionValueExtractor.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hive;
+
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not straight forward and
+ * requires a pluggable implementation to extract the partition value from HDFS path.
+ * <p>
+ * This implementation extracts datestr=yyyy-mm-dd-HH from path of type /yyyy/mm/dd/HH
+ */
+public class SlashEncodedHourPartitionValueExtractor implements PartitionValueExtractor {
+
+  private static final long serialVersionUID = 1L;
+  private transient DateTimeFormatter dtfOut;
+
+  public SlashEncodedHourPartitionValueExtractor() {
+    this.dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd-HH");
+  }
+
+  private DateTimeFormatter getDtfOut() {
+    if (dtfOut == null) {
+      dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd-HH");
+    }
+    return dtfOut;
+  }
+
+  @Override
+  public List<String> extractPartitionValuesInPath(String partitionPath) {
+    // partition path is expected to be in this format yyyy/mm/dd/HH
+    String[] splits = partitionPath.split("/");
+    if (splits.length != 4) {
+      throw new IllegalArgumentException("Partition path " + partitionPath + " is not in the form  yyyy/mm/dd/HH");
+    }
+    //Hive style partitions need to contain '='
+    int year = Integer.parseInt(splits[0].contains("=") ? splits[0].split("=")[1] : splits[0]);
+    int mm = Integer.parseInt(splits[1].contains("=") ? splits[1].split("=")[1] : splits[1]);
+    int dd = Integer.parseInt(splits[2].contains("=") ? splits[2].split("=")[1] : splits[2]);
+    int hh = Integer.parseInt(splits[3].contains("=") ? splits[3].split("=")[1] : splits[3]);
+
+    DateTime dateTime = new DateTime(year, mm, dd, hh, 0);
+
+    return Collections.singletonList(getDtfOut().print(dateTime));
+  }
+}
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
new file mode 100644
index 0000000..a248e49
--- /dev/null
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hive;
+
+import org.junit.jupiter.api.Test;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class TestPartitionValueExtractor {
+  @Test
+  public void testHourPartition() {
+    SlashEncodedHourPartitionValueExtractor hourPartition = new SlashEncodedHourPartitionValueExtractor();
+    List<String> list = new ArrayList<>();
+    list.add("2020-12-20-01");
+    assertEquals(hourPartition.extractPartitionValuesInPath("2020/12/20/01"), list);
+    assertThrows(IllegalArgumentException.class, () -> hourPartition.extractPartitionValuesInPath("2020/12/20"));
+    assertEquals(hourPartition.extractPartitionValuesInPath("update_time=2020/12/20/01"), list);
+  }
+}
\ No newline at end of file