You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by le...@apache.org on 2020/06/15 11:02:13 UTC

[hudi] branch master updated: [HUDI-1003] Handle partitions correctly for syncing hudi non-parititioned table to hive (#1720)

This is an automated email from the ASF dual-hosted git repository.

leesf pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 043eb56  [HUDI-1003] Handle partitions correctly for syncing hudi non-parititioned table to hive (#1720)
043eb56 is described below

commit 043eb564c2c7e4578237b5d0f2bbad8276db51f4
Author: Yajun Luo <ya...@163.com>
AuthorDate: Mon Jun 15 19:02:03 2020 +0800

    [HUDI-1003] Handle partitions correctly for syncing hudi non-parititioned table to hive (#1720)
---
 .../java/org/apache/hudi/hive/HiveSyncTool.java    |  6 +++++
 .../org/apache/hudi/hive/TestHiveSyncTool.java     | 29 ++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index 273635c..c3849d7 100644
--- a/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++ b/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -38,6 +38,7 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.MessageType;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -64,6 +65,11 @@ public class HiveSyncTool {
   public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
     this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
     this.cfg = cfg;
+    // Set partitionFields to empty, when the NonPartitionedExtractor is used
+    if (NonPartitionedExtractor.class.getName().equals(cfg.partitionValueExtractorClass)) {
+      LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
+      cfg.partitionFields = new ArrayList<>();
+    }
     switch (hoodieHiveClient.getTableType()) {
       case COPY_ON_WRITE:
         this.snapshotTableName = cfg.tableName;
diff --git a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
index bf383c4..cab26e0 100644
--- a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
+++ b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
@@ -459,6 +459,35 @@ public class TestHiveSyncTool {
 
   @ParameterizedTest
   @MethodSource("useJdbc")
+  public void testNonPartitionedSync(boolean useJdbc) throws Exception {
+    HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
+    String instantTime = "100";
+    HiveTestUtil.createCOWTable(instantTime, 5, true);
+
+    HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(HiveTestUtil.hiveSyncConfig);
+    // Set partition value extractor to NonPartitionedExtractor
+    hiveSyncConfig.partitionValueExtractorClass = NonPartitionedExtractor.class.getCanonicalName();
+    hiveSyncConfig.tableName = "non_partitioned";
+    hiveSyncConfig.partitionFields = Arrays.asList("year", "month", "day");
+    HiveTestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
+
+    HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
+    assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
+            "Table " + hiveSyncConfig.tableName + " should not exist initially");
+    // Lets do the sync
+    HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
+    tool.syncHoodieTable();
+    assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName),
+            "Table " + hiveSyncConfig.tableName + " should exist after sync completes");
+    assertEquals(hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
+            hiveClient.getDataSchema().getColumns().size(),
+            "Hive Schema should match the table schema,ignoring the partition fields");
+    assertEquals(0, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
+            "Table should not have partitions because of the NonPartitionedExtractor");
+  }
+
+  @ParameterizedTest
+  @MethodSource("useJdbc")
   public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
     HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
     String commitTime = "100";