You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2022/10/03 05:06:57 UTC

[hudi] branch master updated: [HUDI-4966] Add a partition extractor to handle partition values with slashes (#6851)

This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 41b397c104 [HUDI-4966] Add a partition extractor to handle partition values with slashes (#6851)
41b397c104 is described below

commit 41b397c1042b332066b52c45e00bccb0c6150a5c
Author: Y Ethan Guo <et...@gmail.com>
AuthorDate: Sun Oct 2 22:06:45 2022 -0700

    [HUDI-4966] Add a partition extractor to handle partition values with slashes (#6851)
---
 .../hive/SinglePartPartitionValueExtractor.java    | 40 ++++++++++++++++++++++
 .../hudi/hive/TestPartitionValueExtractor.java     | 12 +++++++
 .../apache/hudi/sync/common/HoodieSyncConfig.java  | 11 +++---
 .../hudi/sync/common/TestHoodieSyncConfig.java     |  9 ++++-
 4 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java
new file mode 100644
index 0000000000..abbccfcc53
--- /dev/null
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.hive;
+
+import org.apache.hudi.sync.common.model.PartitionValueExtractor;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Extractor for a partition path from a single column.
+ * <p>
+ * This implementation extracts the partition value from the partition path as a single part
+ * even if the relative partition path contains slashes, e.g., the `TimestampBasedKeyGenerator`
+ * transforms the timestamp column into the partition path in the format of "yyyyMM/dd/HH".
+ * The slash (`/`) is replaced with dash (`-`), e.g., `202210/01/20` -> `202210-01-20`.
+ */
+public class SinglePartPartitionValueExtractor implements PartitionValueExtractor {
+  @Override
+  public List<String> extractPartitionValuesInPath(String partitionPath) {
+    return Collections.singletonList(partitionPath.replace('/', '-'));
+  }
+}
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
index ba5a544af1..075542d596 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
@@ -18,8 +18,12 @@
 
 package org.apache.hudi.hive;
 
+import org.apache.hudi.sync.common.model.PartitionValueExtractor;
+
 import org.junit.jupiter.api.Test;
+
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -46,4 +50,12 @@ public class TestPartitionValueExtractor {
         IllegalArgumentException.class,
         () -> hiveStylePartition.extractPartitionValuesInPath("2021/04/02"));
   }
+
+  @Test
+  public void testSinglePartPartition() {
+    PartitionValueExtractor extractor = new SinglePartPartitionValueExtractor();
+    assertEquals(
+        Collections.singletonList("202210-01-20"),
+        extractor.extractPartitionValuesInPath("202210/01/20"));
+  }
 }
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
index 43502f612f..b927cdb0c3 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
@@ -104,10 +104,13 @@ public class HoodieSyncConfig extends HoodieConfig {
         String partitionFields = partitionFieldsOpt.get();
         if (StringUtils.nonEmpty(partitionFields)) {
           int numOfPartFields = partitionFields.split(",").length;
-          if (numOfPartFields == 1
-              && cfg.contains(HIVE_STYLE_PARTITIONING_ENABLE)
-              && cfg.getString(HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) {
-            return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor");
+          if (numOfPartFields == 1) {
+            if (cfg.contains(HIVE_STYLE_PARTITIONING_ENABLE)
+                && cfg.getString(HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) {
+              return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor");
+            } else {
+              return Option.of("org.apache.hudi.hive.SinglePartPartitionValueExtractor");
+            }
           } else {
             return Option.of("org.apache.hudi.hive.MultiPartKeysValueExtractor");
           }
diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java
index f8e4eff30a..aef283e595 100644
--- a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java
+++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java
@@ -104,7 +104,7 @@ class TestHoodieSyncConfig {
   }
 
   @Test
-  void testInferPartitonExtractorClass() {
+  void testInferPartitionExtractorClass() {
     Properties props0 = new Properties();
     HoodieSyncConfig config0 = new HoodieSyncConfig(props0, new Configuration());
     assertEquals("org.apache.hudi.hive.MultiPartKeysValueExtractor",
@@ -140,6 +140,13 @@ class TestHoodieSyncConfig {
     HoodieSyncConfig config4 = new HoodieSyncConfig(props4, new Configuration());
     assertEquals("org.apache.hudi.hive.HiveStylePartitionValueExtractor",
         config4.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS));
+
+    Properties props5 = new Properties();
+    props5.setProperty(HoodieTableConfig.PARTITION_FIELDS.key(), "foo");
+    props5.setProperty(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE.key(), "false");
+    HoodieSyncConfig config5 = new HoodieSyncConfig(props5, new Configuration());
+    assertEquals("org.apache.hudi.hive.SinglePartPartitionValueExtractor",
+        config5.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS));
   }
 
   @Test