You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2023/01/25 21:33:31 UTC

[hudi] branch master updated: [HUDI-5620] Fix metaserver bundle validation (#7749)

This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new e00ee2d88e2 [HUDI-5620] Fix metaserver bundle validation (#7749)
e00ee2d88e2 is described below

commit e00ee2d88e2def54e455547a2d243909334c6e86
Author: Shiyan Xu <27...@users.noreply.github.com>
AuthorDate: Wed Jan 25 15:33:20 2023 -0600

    [HUDI-5620] Fix metaserver bundle validation (#7749)
---
 .../hudi/common/config/HoodieMetaserverConfig.java |  8 +++-
 packaging/bundle-validation/service/read.scala     |  2 +-
 packaging/bundle-validation/validate.sh            | 50 +++++++++++++---------
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
index f25d1ef8378..52b113ba862 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
@@ -38,6 +38,7 @@ public class HoodieMetaserverConfig extends HoodieConfig {
   public static final ConfigProperty<Boolean> METASERVER_ENABLE = ConfigProperty
       .key(METASERVER_PREFIX + ".enabled")
       .defaultValue(false)
+      .sinceVersion("0.13.0")
       .withDocumentation("Enable Hudi metaserver for storing Hudi tables' metadata.");
 
   public static final ConfigProperty<String> DATABASE_NAME = HoodieTableConfig.DATABASE_NAME;
@@ -47,16 +48,19 @@ public class HoodieMetaserverConfig extends HoodieConfig {
   public static final ConfigProperty<String> METASERVER_URLS = ConfigProperty
       .key(METASERVER_PREFIX + ".uris")
       .defaultValue("thrift://localhost:9090")
+      .sinceVersion("0.13.0")
       .withDocumentation("Metastore server uris");
 
   public static final ConfigProperty<Integer> METASERVER_CONNECTION_RETRIES = ConfigProperty
       .key(METASERVER_PREFIX + ".connect.retries")
       .defaultValue(3)
+      .sinceVersion("0.13.0")
       .withDocumentation("Number of retries while opening a connection to metastore");
 
   public static final ConfigProperty<Integer> METASERVER_CONNECTION_RETRY_DELAY = ConfigProperty
       .key(METASERVER_PREFIX + ".connect.retry.delay")
       .defaultValue(1)
+      .sinceVersion("0.13.0")
       .withDocumentation("Number of seconds for the client to wait between consecutive connection attempts");
 
   public static HoodieMetaserverConfig.Builder newBuilder() {
@@ -68,11 +72,11 @@ public class HoodieMetaserverConfig extends HoodieConfig {
   }
 
   public String getDatabaseName() {
-    return getStringOrDefault(DATABASE_NAME);
+    return getString(DATABASE_NAME);
   }
 
   public String getTableName() {
-    return getStringOrDefault(TABLE_NAME);
+    return getString(TABLE_NAME);
   }
 
   public String getMetaserverUris() {
diff --git a/packaging/bundle-validation/service/read.scala b/packaging/bundle-validation/service/read.scala
index 471ae4ac68f..275d26349e2 100644
--- a/packaging/bundle-validation/service/read.scala
+++ b/packaging/bundle-validation/service/read.scala
@@ -24,5 +24,5 @@ spark.read.format("hudi").
   option("hoodie.metadata.enabled", "false").
   option("hoodie.metaserver.enabled", "true").
   option("hoodie.metaserver.uris", "thrift://localhost:9090").
-  load(basePath).coalesce(1).write.csv("/tmp/sparksql/trips/results")
+  load(basePath).coalesce(1).write.csv("/tmp/metaserver-bundle/sparkdatasource/trips/results")
 System.exit(0)
diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh
index 4363072b481..fbc35ddb497 100755
--- a/packaging/bundle-validation/validate.sh
+++ b/packaging/bundle-validation/validate.sh
@@ -58,15 +58,15 @@ test_spark_hadoop_mr_bundles () {
     echo "::warning::validate.sh Query and validate the results using Spark SQL"
     # save Spark SQL query results
     $SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar \
-      -i <(echo 'spark.sql("select * from trips").coalesce(1).write.csv("/tmp/sparksql/trips/results"); System.exit(0)')
-    numRecordsSparkSQL=$(cat /tmp/sparksql/trips/results/*.csv | wc -l)
-    if [ "$numRecordsSparkSQL" -ne 10 ]; then
+      -i <(echo 'spark.sql("select * from trips").coalesce(1).write.csv("/tmp/spark-bundle/sparksql/trips/results"); System.exit(0)')
+    numRecords=$(cat /tmp/spark-bundle/sparksql/trips/results/*.csv | wc -l)
+    if [ "$numRecords" -ne 10 ]; then
         echo "::error::validate.sh Spark SQL validation failed."
         exit 1
     fi
     echo "::warning::validate.sh Query and validate the results using HiveQL"
     # save HiveQL query results
-    hiveqlresultsdir=/tmp/hiveql/trips/results
+    hiveqlresultsdir=/tmp/hadoop-mr-bundle/hiveql/trips/results
     mkdir -p $hiveqlresultsdir
     $HIVE_HOME/bin/beeline --hiveconf hive.input.format=org.apache.hudi.hadoop.HoodieParquetInputFormat \
       -u jdbc:hive2://localhost:10000/default --showHeader=false --outputformat=csv2 \
@@ -153,7 +153,11 @@ test_flink_bundle() {
     local EXIT_CODE=$?
     $FLINK_HOME/bin/stop-cluster.sh
     unset HADOOP_CLASSPATH
-    exit $EXIT_CODE
+    if [ "$EXIT_CODE" -ne 0 ]; then
+        echo "::error::validate.sh Flink bundle validation failed."
+        exit 1
+    fi
+    echo "::warning::validate.sh done validating Flink bundle validation was successful."
 }
 
 
@@ -184,6 +188,11 @@ test_kafka_connect_bundle() {
     $WORKDIR/kafka/consume.sh
     local EXIT_CODE=$?
     kill $ZOOKEEPER_PID $KAFKA_SERVER_PID $SCHEMA_REG_PID
+    if [ "$EXIT_CODE" -ne 0 ]; then
+        echo "::error::validate.sh Kafka Connect bundle validation failed."
+        exit 1
+    fi
+    echo "::warning::validate.sh done validating Kafka Connect bundle validation was successful."
 }
 
 ##
@@ -192,11 +201,12 @@ test_kafka_connect_bundle() {
 # env vars (defined in container):
 #   SPARK_HOME: path to the spark directory
 ##
-test_hudi_metaserver_bundles () {
-    echo "::warning::validate.sh setting up hudi metaserver bundles validation"
+test_metaserver_bundle () {
+    echo "::warning::validate.sh setting up Metaserver bundle validation"
 
-    echo "::warning::validate.sh Start hudi metaserver"
-    java -jar $JARS_DIR/metaserver.jar & local METASEVER=$!
+    echo "::warning::validate.sh Start Metaserver"
+    java -jar $JARS_DIR/metaserver.jar &
+    local METASEVER_PID=$!
 
     echo "::warning::validate.sh Start hive server"
     $DERBY_HOME/bin/startNetworkServer -h 0.0.0.0 &
@@ -208,18 +218,18 @@ test_hudi_metaserver_bundles () {
     $SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/service/write.scala
     ls /tmp/hudi-bundles/tests/trips
 
-    echo "::warning::validate.sh Query and validate the results using Spark SQL"
-    # save Spark SQL query results
+    echo "::warning::validate.sh Query and validate the results using Spark DataSource"
+    # save Spark DataSource query results
     $SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar  < $WORKDIR/service/read.scala
-    numRecordsSparkSQL=$(cat /tmp/sparksql/trips/results/*.csv | wc -l)
-    echo $numRecordsSparkSQL
-    if [ "$numRecordsSparkSQL" -ne 10 ]; then
-        echo "::error::validate.sh Spark SQL validation failed."
+    numRecords=$(cat /tmp/metaserver-bundle/sparkdatasource/trips/results/*.csv | wc -l)
+    echo $numRecords
+    if [ "$numRecords" -ne 10 ]; then
+        echo "::error::validate.sh Metaserver bundle validation failed."
         exit 1
     fi
 
-    echo "::warning::validate.sh hudi metaserver validation was successful."
-    kill $DERBY_PID $HIVE_PID $METASEVER
+    echo "::warning::validate.sh Metaserver bundle validation was successful."
+    kill $DERBY_PID $HIVE_PID $METASEVER_PID
 }
 
 
@@ -267,9 +277,9 @@ if [ "$?" -ne 0 ]; then
 fi
 echo "::warning::validate.sh done validating kafka connect bundle"
 
-echo "::warning::validate.sh validating hudi metaserver bundle"
-test_hudi_metaserver_bundles
+echo "::warning::validate.sh validating metaserver bundle"
+test_metaserver_bundle
 if [ "$?" -ne 0 ]; then
     exit 1
 fi
-echo "::warning::validate.sh done validating hudi metaserver bundle"
+echo "::warning::validate.sh done validating metaserver bundle"