You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2022/01/19 23:08:21 UTC

[hudi] branch release-0.10.1 updated (1d30495 -> b670801)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a change to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git.


    from 1d30495  [HUDI-3268] Fixing NullPointerException with HoodieFileIndex when keygenclass is null in table config (#4633)
     new 101be1a  [HUDI-3257] Excluding clustering instants from pending rollback info (#4616)
     new a8ee57f  [HUDI-3194] fix MOR snapshot query during compaction (#4540)
     new f28ea4d  [HUDI-3263] Do not nullify members in HoodieTableFileSystemView#resetViewState to avoid NPE (#4625)
     new 6df4639  [HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602)
     new 5dada43  Removing a extraneous test class
     new b670801  Bumping release candidate number 2

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/hoodie/hadoop/base/pom.xml                  |  2 +-
 docker/hoodie/hadoop/datanode/pom.xml              |  2 +-
 docker/hoodie/hadoop/historyserver/pom.xml         |  2 +-
 docker/hoodie/hadoop/hive_base/pom.xml             |  2 +-
 docker/hoodie/hadoop/namenode/pom.xml              |  2 +-
 docker/hoodie/hadoop/pom.xml                       |  2 +-
 docker/hoodie/hadoop/prestobase/pom.xml            |  2 +-
 docker/hoodie/hadoop/spark_base/pom.xml            |  2 +-
 docker/hoodie/hadoop/sparkadhoc/pom.xml            |  2 +-
 docker/hoodie/hadoop/sparkmaster/pom.xml           |  2 +-
 docker/hoodie/hadoop/sparkworker/pom.xml           |  2 +-
 hudi-aws/pom.xml                                   |  4 +--
 hudi-cli/pom.xml                                   |  2 +-
 hudi-client/hudi-client-common/pom.xml             |  4 +--
 .../hudi/client/AbstractHoodieWriteClient.java     | 12 ++++++---
 .../apache/hudi/config/HoodieStorageConfig.java    |  4 +--
 hudi-client/hudi-flink-client/pom.xml              |  4 +--
 hudi-client/hudi-java-client/pom.xml               |  4 +--
 hudi-client/hudi-spark-client/pom.xml              |  4 +--
 hudi-client/pom.xml                                |  2 +-
 hudi-common/pom.xml                                |  2 +-
 .../table/view/HoodieTableFileSystemView.java      | 30 +++++++++++++++-------
 hudi-examples/pom.xml                              |  2 +-
 hudi-flink/pom.xml                                 |  2 +-
 hudi-hadoop-mr/pom.xml                             |  2 +-
 .../utils/HoodieRealtimeInputFormatUtils.java      |  8 +++---
 hudi-integ-test/pom.xml                            |  2 +-
 hudi-kafka-connect/pom.xml                         |  4 +--
 hudi-spark-datasource/hudi-spark-common/pom.xml    |  4 +--
 .../main/java/org/apache/hudi/DataSourceUtils.java |  9 ++++---
 hudi-spark-datasource/hudi-spark/pom.xml           |  4 +--
 .../java/org/apache/hudi/TestDataSourceUtils.java  |  8 +++---
 hudi-spark-datasource/hudi-spark2/pom.xml          |  4 +--
 .../org/apache/hudi/internal/DefaultSource.java    |  2 +-
 hudi-spark-datasource/hudi-spark3/pom.xml          |  4 +--
 .../apache/hudi/spark3/internal/DefaultSource.java |  7 +++--
 hudi-spark-datasource/pom.xml                      |  2 +-
 hudi-sync/hudi-dla-sync/pom.xml                    |  2 +-
 hudi-sync/hudi-hive-sync/pom.xml                   |  2 +-
 hudi-sync/hudi-sync-common/pom.xml                 |  2 +-
 hudi-sync/pom.xml                                  |  2 +-
 hudi-timeline-service/pom.xml                      |  2 +-
 hudi-utilities/pom.xml                             |  2 +-
 packaging/hudi-flink-bundle/pom.xml                |  2 +-
 packaging/hudi-hadoop-mr-bundle/pom.xml            |  2 +-
 packaging/hudi-hive-sync-bundle/pom.xml            |  2 +-
 packaging/hudi-integ-test-bundle/pom.xml           |  2 +-
 packaging/hudi-kafka-connect-bundle/pom.xml        |  2 +-
 packaging/hudi-presto-bundle/pom.xml               |  2 +-
 packaging/hudi-spark-bundle/pom.xml                |  2 +-
 packaging/hudi-timeline-server-bundle/pom.xml      |  2 +-
 packaging/hudi-utilities-bundle/pom.xml            |  2 +-
 pom.xml                                            |  2 +-
 53 files changed, 105 insertions(+), 85 deletions(-)

[hudi] 03/06: [HUDI-3263] Do not nullify members in HoodieTableFileSystemView#resetViewState to avoid NPE (#4625)

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit f28ea4d5a94d93d8fa7dd562e927b110312459c1
Author: Danny Chan <yu...@gmail.com>
AuthorDate: Tue Jan 18 17:46:40 2022 +0800

    [HUDI-3263] Do not nullify members in HoodieTableFileSystemView#resetViewState to avoid NPE (#4625)
---
 .../table/view/HoodieTableFileSystemView.java      | 30 +++++++++++++++-------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
index b8f7f31..299dbab 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
@@ -114,11 +114,22 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
 
   @Override
   protected void resetViewState() {
-    this.fgIdToPendingCompaction = null;
-    this.partitionToFileGroupsMap = null;
-    this.fgIdToBootstrapBaseFile = null;
-    this.fgIdToReplaceInstants = null;
-    this.fgIdToPendingClustering = null;
+    // do not nullify the members to avoid NPE.
+
+    // there are two cases that #resetViewState is called:
+    // 1. when #sync is invoked, the view clear the state through calling #resetViewState,
+    // then re-initialize the view;
+    // 2. when #close is invoked.
+    // (see AbstractTableFileSystemView for details.)
+
+    // for the 1st case, we better do not nullify the members when #resetViewState
+    // because there is possibility that this in-memory view is a backend view under TimelineServer,
+    // and many methods in the RequestHandler is not thread safe, when performRefreshCheck flag in ViewHandler
+    // is set as false, the view does not perform refresh check, if #sync is called just before and the members
+    // are nullified, the methods that use these members would throw NPE.
+
+    // actually there is no need to nullify the members here for 1st case, the members are assigned with new values
+    // when calling #init, for 2nd case, the #close method already nullify the members.
   }
 
   protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
@@ -350,10 +361,11 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
   @Override
   public void close() {
     super.close();
-    partitionToFileGroupsMap = null;
-    fgIdToPendingCompaction = null;
-    fgIdToBootstrapBaseFile = null;
-    fgIdToReplaceInstants = null;
+    this.fgIdToPendingCompaction = null;
+    this.partitionToFileGroupsMap = null;
+    this.fgIdToBootstrapBaseFile = null;
+    this.fgIdToReplaceInstants = null;
+    this.fgIdToPendingClustering = null;
     closed = true;
   }
 

[hudi] 06/06: Bumping release candidate number 2

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit b670801afc110870f354766c872f386d18261add
Author: sivabalan <n....@gmail.com>
AuthorDate: Wed Jan 19 18:05:51 2022 -0500

    Bumping release candidate number 2
---
 docker/hoodie/hadoop/base/pom.xml               | 2 +-
 docker/hoodie/hadoop/datanode/pom.xml           | 2 +-
 docker/hoodie/hadoop/historyserver/pom.xml      | 2 +-
 docker/hoodie/hadoop/hive_base/pom.xml          | 2 +-
 docker/hoodie/hadoop/namenode/pom.xml           | 2 +-
 docker/hoodie/hadoop/pom.xml                    | 2 +-
 docker/hoodie/hadoop/prestobase/pom.xml         | 2 +-
 docker/hoodie/hadoop/spark_base/pom.xml         | 2 +-
 docker/hoodie/hadoop/sparkadhoc/pom.xml         | 2 +-
 docker/hoodie/hadoop/sparkmaster/pom.xml        | 2 +-
 docker/hoodie/hadoop/sparkworker/pom.xml        | 2 +-
 hudi-aws/pom.xml                                | 4 ++--
 hudi-cli/pom.xml                                | 2 +-
 hudi-client/hudi-client-common/pom.xml          | 4 ++--
 hudi-client/hudi-flink-client/pom.xml           | 4 ++--
 hudi-client/hudi-java-client/pom.xml            | 4 ++--
 hudi-client/hudi-spark-client/pom.xml           | 4 ++--
 hudi-client/pom.xml                             | 2 +-
 hudi-common/pom.xml                             | 2 +-
 hudi-examples/pom.xml                           | 2 +-
 hudi-flink/pom.xml                              | 2 +-
 hudi-hadoop-mr/pom.xml                          | 2 +-
 hudi-integ-test/pom.xml                         | 2 +-
 hudi-kafka-connect/pom.xml                      | 4 ++--
 hudi-spark-datasource/hudi-spark-common/pom.xml | 4 ++--
 hudi-spark-datasource/hudi-spark/pom.xml        | 4 ++--
 hudi-spark-datasource/hudi-spark2/pom.xml       | 4 ++--
 hudi-spark-datasource/hudi-spark3/pom.xml       | 4 ++--
 hudi-spark-datasource/pom.xml                   | 2 +-
 hudi-sync/hudi-dla-sync/pom.xml                 | 2 +-
 hudi-sync/hudi-hive-sync/pom.xml                | 2 +-
 hudi-sync/hudi-sync-common/pom.xml              | 2 +-
 hudi-sync/pom.xml                               | 2 +-
 hudi-timeline-service/pom.xml                   | 2 +-
 hudi-utilities/pom.xml                          | 2 +-
 packaging/hudi-flink-bundle/pom.xml             | 2 +-
 packaging/hudi-hadoop-mr-bundle/pom.xml         | 2 +-
 packaging/hudi-hive-sync-bundle/pom.xml         | 2 +-
 packaging/hudi-integ-test-bundle/pom.xml        | 2 +-
 packaging/hudi-kafka-connect-bundle/pom.xml     | 2 +-
 packaging/hudi-presto-bundle/pom.xml            | 2 +-
 packaging/hudi-spark-bundle/pom.xml             | 2 +-
 packaging/hudi-timeline-server-bundle/pom.xml   | 2 +-
 packaging/hudi-utilities-bundle/pom.xml         | 2 +-
 pom.xml                                         | 2 +-
 45 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/docker/hoodie/hadoop/base/pom.xml b/docker/hoodie/hadoop/base/pom.xml
index 24b0357..1abafa8 100644
--- a/docker/hoodie/hadoop/base/pom.xml
+++ b/docker/hoodie/hadoop/base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/datanode/pom.xml b/docker/hoodie/hadoop/datanode/pom.xml
index 61c63f3..54c560b 100644
--- a/docker/hoodie/hadoop/datanode/pom.xml
+++ b/docker/hoodie/hadoop/datanode/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/historyserver/pom.xml b/docker/hoodie/hadoop/historyserver/pom.xml
index 856f776..8e7ca1a 100644
--- a/docker/hoodie/hadoop/historyserver/pom.xml
+++ b/docker/hoodie/hadoop/historyserver/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/hive_base/pom.xml b/docker/hoodie/hadoop/hive_base/pom.xml
index 408bbc5..a3a800f 100644
--- a/docker/hoodie/hadoop/hive_base/pom.xml
+++ b/docker/hoodie/hadoop/hive_base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/namenode/pom.xml b/docker/hoodie/hadoop/namenode/pom.xml
index a47e81e..33e60b4 100644
--- a/docker/hoodie/hadoop/namenode/pom.xml
+++ b/docker/hoodie/hadoop/namenode/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml
index 6b47d0d..9872b58 100644
--- a/docker/hoodie/hadoop/pom.xml
+++ b/docker/hoodie/hadoop/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/docker/hoodie/hadoop/prestobase/pom.xml b/docker/hoodie/hadoop/prestobase/pom.xml
index 7fe63c9..66acfdb 100644
--- a/docker/hoodie/hadoop/prestobase/pom.xml
+++ b/docker/hoodie/hadoop/prestobase/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/spark_base/pom.xml b/docker/hoodie/hadoop/spark_base/pom.xml
index d4d0ca5..654a539 100644
--- a/docker/hoodie/hadoop/spark_base/pom.xml
+++ b/docker/hoodie/hadoop/spark_base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml
index 220ab1e..e40ff71 100644
--- a/docker/hoodie/hadoop/sparkadhoc/pom.xml
+++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkmaster/pom.xml b/docker/hoodie/hadoop/sparkmaster/pom.xml
index b54ead3..865fb04 100644
--- a/docker/hoodie/hadoop/sparkmaster/pom.xml
+++ b/docker/hoodie/hadoop/sparkmaster/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkworker/pom.xml b/docker/hoodie/hadoop/sparkworker/pom.xml
index b862fe0..1eb987c 100644
--- a/docker/hoodie/hadoop/sparkworker/pom.xml
+++ b/docker/hoodie/hadoop/sparkworker/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml
index 34f0662..224de1e 100644
--- a/hudi-aws/pom.xml
+++ b/hudi-aws/pom.xml
@@ -19,12 +19,12 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.10.1-rc1</version>
+        <version>0.10.1-rc2</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-aws</artifactId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
 
     <name>hudi-aws</name>
     <packaging>jar</packaging>
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 3332d67..3f134f2 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml
index 540ac82..34838bd 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-client-common</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-client-common</name>
   <packaging>jar</packaging>
diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml
index ff14665..f071442 100644
--- a/hudi-client/hudi-flink-client/pom.xml
+++ b/hudi-client/hudi-flink-client/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-flink-client</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-flink-client</name>
   <packaging>jar</packaging>
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
index abf5b64..525a1f6 100644
--- a/hudi-client/hudi-java-client/pom.xml
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -19,12 +19,12 @@
     <parent>
         <artifactId>hudi-client</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.10.1-rc1</version>
+        <version>0.10.1-rc2</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-java-client</artifactId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
 
     <name>hudi-java-client</name>
     <packaging>jar</packaging>
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index b0d2b4b..993c223 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark-client</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-spark-client</name>
   <packaging>jar</packaging>
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index 5bf4224..4f86e0f 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index 5c574b2..d9ca236 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml
index 89e8ec6..e912a5f 100644
--- a/hudi-examples/pom.xml
+++ b/hudi-examples/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml
index 7b84900..f7d9d8e 100644
--- a/hudi-flink/pom.xml
+++ b/hudi-flink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml
index 51e930d..a66a273 100644
--- a/hudi-hadoop-mr/pom.xml
+++ b/hudi-hadoop-mr/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 9c93e7a..6ef0c27 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   <artifactId>hudi-integ-test</artifactId>
diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml
index 14eb135..9360ca8 100644
--- a/hudi-kafka-connect/pom.xml
+++ b/hudi-kafka-connect/pom.xml
@@ -19,13 +19,13 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.10.1-rc1</version>
+        <version>0.10.1-rc2</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-kafka-connect</artifactId>
     <description>Kafka Connect Sink Connector for Hudi</description>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <packaging>jar</packaging>
 
     <properties>
diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml
index 1eae147..f20a4a4 100644
--- a/hudi-spark-datasource/hudi-spark-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark-common/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-spark-common_${scala.binary.version}</name>
   <packaging>jar</packaging>
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml
index 4044a5e..ae21c6a 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark_${scala.binary.version}</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-spark_${scala.binary.version}</name>
   <packaging>jar</packaging>
diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml
index 62de422..4ab0e4d 100644
--- a/hudi-spark-datasource/hudi-spark2/pom.xml
+++ b/hudi-spark-datasource/hudi-spark2/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark2_${scala.binary.version}</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-spark2_${scala.binary.version}</name>
   <packaging>jar</packaging>
diff --git a/hudi-spark-datasource/hudi-spark3/pom.xml b/hudi-spark-datasource/hudi-spark3/pom.xml
index 3a0f34f..ec87190 100644
--- a/hudi-spark-datasource/hudi-spark3/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark3_2.12</artifactId>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
 
   <name>hudi-spark3_2.12</name>
   <packaging>jar</packaging>
diff --git a/hudi-spark-datasource/pom.xml b/hudi-spark-datasource/pom.xml
index 9706c98..8e06dbc 100644
--- a/hudi-spark-datasource/pom.xml
+++ b/hudi-spark-datasource/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-sync/hudi-dla-sync/pom.xml b/hudi-sync/hudi-dla-sync/pom.xml
index f5d3b63..32f88ab 100644
--- a/hudi-sync/hudi-dla-sync/pom.xml
+++ b/hudi-sync/hudi-dla-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml
index f9ab728..f05e5d9 100644
--- a/hudi-sync/hudi-hive-sync/pom.xml
+++ b/hudi-sync/hudi-hive-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml
index 80876dd..7314c9e 100644
--- a/hudi-sync/hudi-sync-common/pom.xml
+++ b/hudi-sync/hudi-sync-common/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/hudi-sync/pom.xml b/hudi-sync/pom.xml
index 9b30053..8142537 100644
--- a/hudi-sync/pom.xml
+++ b/hudi-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index 23dfb55..bb6d979 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index e497a9e..70923a6 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index cdf9614..5560508 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index 129559a..f9364412 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml
index 3f7a3be..cb16097 100644
--- a/packaging/hudi-hive-sync-bundle/pom.xml
+++ b/packaging/hudi-hive-sync-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml
index fab8498..7e5d0b0 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml
index 497dc7d..0b16c0c 100644
--- a/packaging/hudi-kafka-connect-bundle/pom.xml
+++ b/packaging/hudi-kafka-connect-bundle/pom.xml
@@ -20,7 +20,7 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.10.1-rc1</version>
+        <version>0.10.1-rc2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index c6e7b95..fd4626b 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index ff3dda5..8959e6a 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml
index d3cc994..655c227 100644
--- a/packaging/hudi-timeline-server-bundle/pom.xml
+++ b/packaging/hudi-timeline-server-bundle/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.10.1-rc1</version>
+        <version>0.10.1-rc2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index 7e4471e..32e448b 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.10.1-rc1</version>
+    <version>0.10.1-rc2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/pom.xml b/pom.xml
index cea8745..4bae3b3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,7 +29,7 @@
   <groupId>org.apache.hudi</groupId>
   <artifactId>hudi</artifactId>
   <packaging>pom</packaging>
-  <version>0.10.1-rc1</version>
+  <version>0.10.1-rc2</version>
   <description>Apache Hudi brings stream style processing on big data</description>
   <url>https://github.com/apache/hudi</url>
   <name>Hudi</name>

[hudi] 02/06: [HUDI-3194] fix MOR snapshot query during compaction (#4540)

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit a8ee57f3ea1f9a47d42d92ab508025aa968e26e7
Author: Yuwei XIAO <yw...@gmail.com>
AuthorDate: Tue Jan 18 06:24:24 2022 +0800

    [HUDI-3194] fix MOR snapshot query during compaction (#4540)
---
 .../TestHoodieSparkMergeOnReadTableCompaction.java | 125 +++++++++++++++++++++
 .../utils/HoodieRealtimeInputFormatUtils.java      |   8 +-
 2 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
new file mode 100644
index 0000000..13903bf
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.functional;
+
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieLayoutConfig;
+import org.apache.hudi.config.HoodieStorageConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner;
+import org.apache.hudi.table.storage.HoodieStorageLayout;
+import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
+import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.apache.hudi.config.HoodieWriteConfig.AUTO_COMMIT_ENABLE;
+
+@Tag("functional")
+public class TestHoodieSparkMergeOnReadTableCompaction extends SparkClientFunctionalTestHarness {
+
+  private HoodieTestDataGenerator dataGen;
+  private SparkRDDWriteClient client;
+  private HoodieTableMetaClient metaClient;
+
+  @BeforeEach
+  public void setup() {
+    dataGen = new HoodieTestDataGenerator();
+  }
+
+  @Test
+  public void testWriteDuringCompaction() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
+        .forTable("test-trip-table")
+        .withPath(basePath())
+        .withSchema(TRIP_EXAMPLE_SCHEMA)
+        .withParallelism(2, 2)
+        .withAutoCommit(false)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withMaxNumDeltaCommitsBeforeCompaction(1).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder()
+            .parquetMaxFileSize(1024).build())
+        .withLayoutConfig(HoodieLayoutConfig.newBuilder()
+            .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name())
+            .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build())
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()).build();
+    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, config.getProps());
+    client = getHoodieWriteClient(config);
+
+    // write data and commit
+    writeData(HoodieActiveTimeline.createNewInstantTime(), 100, true);
+    // write data again, and in the case of bucket index, all records will go into log files (we use a small max_file_size)
+    writeData(HoodieActiveTimeline.createNewInstantTime(), 100, true);
+    Assertions.assertEquals(200, readTableTotalRecordsNum());
+    // schedule compaction
+    String compactionTime = (String) client.scheduleCompaction(Option.empty()).get();
+    // write data, and do not commit. those records should not visible to reader
+    String insertTime = HoodieActiveTimeline.createNewInstantTime();
+    List<WriteStatus> writeStatuses = writeData(insertTime, 100, false);
+    Assertions.assertEquals(200, readTableTotalRecordsNum());
+    // commit the write. The records should be visible now even though the compaction does not complete.
+    client.commitStats(insertTime, writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()), Option.empty(), metaClient.getCommitActionType());
+    Assertions.assertEquals(300, readTableTotalRecordsNum());
+    // after the compaction, total records should remain the same
+    config.setValue(AUTO_COMMIT_ENABLE, "true");
+    client.compact(compactionTime);
+    Assertions.assertEquals(300, readTableTotalRecordsNum());
+  }
+
+  private long readTableTotalRecordsNum() {
+    return HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(),
+        Arrays.stream(dataGen.getPartitionPaths()).map(p -> Paths.get(basePath(), p).toString()).collect(Collectors.toList()), basePath()).size();
+  }
+
+  private List<WriteStatus> writeData(String instant, int numRecords, boolean doCommit) {
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    JavaRDD records = jsc().parallelize(dataGen.generateInserts(instant, numRecords), 2);
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    client.startCommitWithTime(instant);
+    List<WriteStatus> writeStatues = client.upsert(records, instant).collect();
+    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(writeStatues);
+    if (doCommit) {
+      Assertions.assertTrue(client.commitStats(instant, writeStatues.stream().map(WriteStatus::getStat).collect(Collectors.toList()),
+          Option.empty(), metaClient.getCommitActionType()));
+    }
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    return writeStatues;
+  }
+}
\ No newline at end of file
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
index 09338d3..6718642 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
@@ -69,7 +69,7 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
 
   private static final Logger LOG = LogManager.getLogger(HoodieRealtimeInputFormatUtils.class);
 
-  public static InputSplit[] getRealtimeSplits(Configuration conf, Stream<FileSplit> fileSplits) throws IOException {
+  public static InputSplit[] getRealtimeSplits(Configuration conf, Stream<FileSplit> fileSplits) {
     Map<Path, List<FileSplit>> partitionsToParquetSplits =
         fileSplits.collect(Collectors.groupingBy(split -> split.getPath().getParent()));
     // TODO(vc): Should we handle also non-hoodie splits here?
@@ -94,8 +94,8 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
         HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
         if (!fsCache.containsKey(metaClient)) {
           HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(conf);
-          HoodieTableFileSystemView fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
-              metaClient, HoodieInputFormatUtils.buildMetadataConfig(conf));
+          HoodieTableFileSystemView fsView = FileSystemViewManager.createInMemoryFileSystemViewWithTimeline(engineContext,
+              metaClient, HoodieInputFormatUtils.buildMetadataConfig(conf), metaClient.getActiveTimeline());
           fsCache.put(metaClient, fsView);
         }
         HoodieTableFileSystemView fsView = fsCache.get(metaClient);
@@ -103,7 +103,7 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
         String relPartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), partitionPath);
         // Both commit and delta-commits are included - pick the latest completed one
         Option<HoodieInstant> latestCompletedInstant =
-            metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
+            metaClient.getActiveTimeline().getWriteTimeline().filterCompletedInstants().lastInstant();
 
         Stream<FileSlice> latestFileSlices = latestCompletedInstant
             .map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))

[hudi] 01/06: [HUDI-3257] Excluding clustering instants from pending rollback info (#4616)

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 101be1a0139ad550a509cf3d2c2a101ea62930b4
Author: Danny Chan <yu...@gmail.com>
AuthorDate: Mon Jan 17 18:18:45 2022 +0800

    [HUDI-3257] Excluding clustering instants from pending rollback info (#4616)
---
 .../org/apache/hudi/client/AbstractHoodieWriteClient.java    | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
index 76b10fd..8da1db5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
@@ -906,13 +906,17 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
   protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos(HoodieTableMetaClient metaClient) {
     List<HoodieInstant> instants = metaClient.getActiveTimeline().filterPendingRollbackTimeline().getInstants().collect(Collectors.toList());
     Map<String, Option<HoodiePendingRollbackInfo>> infoMap = new HashMap<>();
-    HoodieTimeline pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline();
     for (HoodieInstant instant : instants) {
       try {
         HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, instant);
-        String instantToRollback = rollbackPlan.getInstantToRollback().getCommitTime();
-        if (!pendingCompactionTimeline.containsInstant(instantToRollback)) {
-          infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
+        String action = rollbackPlan.getInstantToRollback().getAction();
+        if (!HoodieTimeline.COMPACTION_ACTION.equals(action)) {
+          boolean isClustering = HoodieTimeline.REPLACE_COMMIT_ACTION.equals(action)
+              && ClusteringUtils.getClusteringPlan(metaClient, instant).isPresent();
+          if (!isClustering) {
+            String instantToRollback = rollbackPlan.getInstantToRollback().getCommitTime();
+            infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
+          }
         }
       } catch (IOException e) {
         LOG.warn("Fetching rollback plan failed for " + infoMap + ", skip the plan", e);

[hudi] 04/06: [HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602)

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 6df46394eec649658d355c4df4ca0acb77467db4
Author: Thinking Chen <cd...@hotmail.com>
AuthorDate: Wed Jan 19 03:51:09 2022 +0800

    [HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602)
---
 .../main/java/org/apache/hudi/config/HoodieStorageConfig.java    | 4 ++--
 .../src/main/java/org/apache/hudi/DataSourceUtils.java           | 9 +++++----
 .../src/test/java/org/apache/hudi/TestDataSourceUtils.java       | 8 ++++----
 .../src/main/java/org/apache/hudi/internal/DefaultSource.java    | 2 +-
 .../main/java/org/apache/hudi/spark3/internal/DefaultSource.java | 7 +++++--
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
index 22118da..42689ec 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
@@ -116,14 +116,14 @@ public class HoodieStorageConfig extends HoodieConfig {
       .withDocumentation("Whether to use dictionary encoding");
 
   public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
-          .key("hoodie.parquet.writeLegacyFormat.enabled")
+          .key("hoodie.parquet.writelegacyformat.enabled")
           .defaultValue("false")
           .withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
                   + "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
                   + "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
 
   public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
-          .key("hoodie.parquet.outputTimestampType")
+          .key("hoodie.parquet.outputtimestamptype")
           .defaultValue("TIMESTAMP_MILLIS")
           .withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
index 3af6ccc..b3bc3b2 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -38,6 +38,7 @@ import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodiePayloadConfig;
+import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
@@ -316,12 +317,12 @@ public class DataSourceUtils {
 
   // Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(),
   // but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType.
-  // try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
+  // try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writelegacyformat.enabled"
   public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) {
     if (DataTypeUtils.foundSmallPrecisionDecimalType(schema)
-        && !Boolean.parseBoolean(properties.getOrDefault("hoodie.parquet.writeLegacyFormat.enabled", "false"))) {
-      properties.put("hoodie.parquet.writeLegacyFormat.enabled", "true");
-      LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writeLegacyFormat.enabled to true");
+        && !Boolean.parseBoolean(properties.getOrDefault(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "false"))) {
+      properties.put(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "true");
+      LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writelegacyformat.enabled to true");
     }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
index a130c3a..0c5a212 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
@@ -299,18 +299,18 @@ public class TestDataSourceUtils {
     StructType structType = StructType$.MODULE$.apply(structFields);
     // create write options
     Map<String, String> options = new HashMap<>();
-    options.put("hoodie.parquet.writeLegacyFormat.enabled", String.valueOf(defaultWriteValue));
+    options.put("hoodie.parquet.writelegacyformat.enabled", String.valueOf(defaultWriteValue));
 
     // start test
     mayBeOverwriteParquetWriteLegacyFormatProp(options, structType);
 
     // check result
-    boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writeLegacyFormat.enabled"));
+    boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writelegacyformat.enabled"));
     if (smallDecimal) {
-      // should auto modify "hoodie.parquet.writeLegacyFormat.enabled" = "true".
+      // should auto modify "hoodie.parquet.writelegacyformat.enabled" = "true".
       assertEquals(true, res);
     } else {
-      // should not modify the value of "hoodie.parquet.writeLegacyFormat.enabled".
+      // should not modify the value of "hoodie.parquet.writelegacyformat.enabled".
       assertEquals(defaultWriteValue, res);
     }
   }
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java b/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
index e607b2f..e9ed609 100644
--- a/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
+++ b/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
@@ -68,7 +68,7 @@ public class DefaultSource extends BaseDefaultSource implements DataSourceV2,
     boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(),
         Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
     Map<String, String> properties = options.asMap();
-    // Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
+    // Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
     mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
     // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
     HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties);
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
index 63c09e0..3071894 100644
--- a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
+++ b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
@@ -31,6 +31,7 @@ import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
+import java.util.HashMap;
 import java.util.Map;
 
 import static org.apache.hudi.DataSourceUtils.mayBeOverwriteParquetWriteLegacyFormatProp;
@@ -55,8 +56,10 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider {
         HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
     boolean arePartitionRecordsSorted = Boolean.parseBoolean(properties.getOrDefault(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED,
         Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED)));
-    // Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
-    mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
+    // Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
+    // Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
+    Map<String, String> newProps = new HashMap<>(properties);
+    mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
     // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
     HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(properties.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, properties);
     return new HoodieDataSourceInternalTable(instantTime, config, schema, getSparkSession(),

[hudi] 05/06: Removing a extraneous test class

Posted by si...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 5dada430ec3b2e34cd64b2f589163cebcf51c469
Author: sivabalan <n....@gmail.com>
AuthorDate: Wed Jan 19 18:02:07 2022 -0500

    Removing a extraneous test class
---
 .../TestHoodieSparkMergeOnReadTableCompaction.java | 125 ---------------------
 1 file changed, 125 deletions(-)

diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
deleted file mode 100644
index 13903bf..0000000
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.table.functional;
-
-import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieCompactionConfig;
-import org.apache.hudi.config.HoodieIndexConfig;
-import org.apache.hudi.config.HoodieLayoutConfig;
-import org.apache.hudi.config.HoodieStorageConfig;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.index.HoodieIndex;
-import org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner;
-import org.apache.hudi.table.storage.HoodieStorageLayout;
-import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
-import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Tag;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
-import static org.apache.hudi.config.HoodieWriteConfig.AUTO_COMMIT_ENABLE;
-
-@Tag("functional")
-public class TestHoodieSparkMergeOnReadTableCompaction extends SparkClientFunctionalTestHarness {
-
-  private HoodieTestDataGenerator dataGen;
-  private SparkRDDWriteClient client;
-  private HoodieTableMetaClient metaClient;
-
-  @BeforeEach
-  public void setup() {
-    dataGen = new HoodieTestDataGenerator();
-  }
-
-  @Test
-  public void testWriteDuringCompaction() throws IOException {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
-        .forTable("test-trip-table")
-        .withPath(basePath())
-        .withSchema(TRIP_EXAMPLE_SCHEMA)
-        .withParallelism(2, 2)
-        .withAutoCommit(false)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            .withMaxNumDeltaCommitsBeforeCompaction(1).build())
-        .withStorageConfig(HoodieStorageConfig.newBuilder()
-            .parquetMaxFileSize(1024).build())
-        .withLayoutConfig(HoodieLayoutConfig.newBuilder()
-            .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name())
-            .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build())
-        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()).build();
-    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, config.getProps());
-    client = getHoodieWriteClient(config);
-
-    // write data and commit
-    writeData(HoodieActiveTimeline.createNewInstantTime(), 100, true);
-    // write data again, and in the case of bucket index, all records will go into log files (we use a small max_file_size)
-    writeData(HoodieActiveTimeline.createNewInstantTime(), 100, true);
-    Assertions.assertEquals(200, readTableTotalRecordsNum());
-    // schedule compaction
-    String compactionTime = (String) client.scheduleCompaction(Option.empty()).get();
-    // write data, and do not commit. those records should not visible to reader
-    String insertTime = HoodieActiveTimeline.createNewInstantTime();
-    List<WriteStatus> writeStatuses = writeData(insertTime, 100, false);
-    Assertions.assertEquals(200, readTableTotalRecordsNum());
-    // commit the write. The records should be visible now even though the compaction does not complete.
-    client.commitStats(insertTime, writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()), Option.empty(), metaClient.getCommitActionType());
-    Assertions.assertEquals(300, readTableTotalRecordsNum());
-    // after the compaction, total records should remain the same
-    config.setValue(AUTO_COMMIT_ENABLE, "true");
-    client.compact(compactionTime);
-    Assertions.assertEquals(300, readTableTotalRecordsNum());
-  }
-
-  private long readTableTotalRecordsNum() {
-    return HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(),
-        Arrays.stream(dataGen.getPartitionPaths()).map(p -> Paths.get(basePath(), p).toString()).collect(Collectors.toList()), basePath()).size();
-  }
-
-  private List<WriteStatus> writeData(String instant, int numRecords, boolean doCommit) {
-    metaClient = HoodieTableMetaClient.reload(metaClient);
-    JavaRDD records = jsc().parallelize(dataGen.generateInserts(instant, numRecords), 2);
-    metaClient = HoodieTableMetaClient.reload(metaClient);
-    client.startCommitWithTime(instant);
-    List<WriteStatus> writeStatues = client.upsert(records, instant).collect();
-    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(writeStatues);
-    if (doCommit) {
-      Assertions.assertTrue(client.commitStats(instant, writeStatues.stream().map(WriteStatus::getStat).collect(Collectors.toList()),
-          Option.empty(), metaClient.getCommitActionType()));
-    }
-    metaClient = HoodieTableMetaClient.reload(metaClient);
-    return writeStatues;
-  }
-}
\ No newline at end of file