You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2023/01/25 21:33:31 UTC
[hudi] branch master updated: [HUDI-5620] Fix metaserver bundle validation (#7749)
This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new e00ee2d88e2 [HUDI-5620] Fix metaserver bundle validation (#7749)
e00ee2d88e2 is described below
commit e00ee2d88e2def54e455547a2d243909334c6e86
Author: Shiyan Xu <27...@users.noreply.github.com>
AuthorDate: Wed Jan 25 15:33:20 2023 -0600
[HUDI-5620] Fix metaserver bundle validation (#7749)
---
.../hudi/common/config/HoodieMetaserverConfig.java | 8 +++-
packaging/bundle-validation/service/read.scala | 2 +-
packaging/bundle-validation/validate.sh | 50 +++++++++++++---------
3 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
index f25d1ef8378..52b113ba862 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetaserverConfig.java
@@ -38,6 +38,7 @@ public class HoodieMetaserverConfig extends HoodieConfig {
public static final ConfigProperty<Boolean> METASERVER_ENABLE = ConfigProperty
.key(METASERVER_PREFIX + ".enabled")
.defaultValue(false)
+ .sinceVersion("0.13.0")
.withDocumentation("Enable Hudi metaserver for storing Hudi tables' metadata.");
public static final ConfigProperty<String> DATABASE_NAME = HoodieTableConfig.DATABASE_NAME;
@@ -47,16 +48,19 @@ public class HoodieMetaserverConfig extends HoodieConfig {
public static final ConfigProperty<String> METASERVER_URLS = ConfigProperty
.key(METASERVER_PREFIX + ".uris")
.defaultValue("thrift://localhost:9090")
+ .sinceVersion("0.13.0")
.withDocumentation("Metastore server uris");
public static final ConfigProperty<Integer> METASERVER_CONNECTION_RETRIES = ConfigProperty
.key(METASERVER_PREFIX + ".connect.retries")
.defaultValue(3)
+ .sinceVersion("0.13.0")
.withDocumentation("Number of retries while opening a connection to metastore");
public static final ConfigProperty<Integer> METASERVER_CONNECTION_RETRY_DELAY = ConfigProperty
.key(METASERVER_PREFIX + ".connect.retry.delay")
.defaultValue(1)
+ .sinceVersion("0.13.0")
.withDocumentation("Number of seconds for the client to wait between consecutive connection attempts");
public static HoodieMetaserverConfig.Builder newBuilder() {
@@ -68,11 +72,11 @@ public class HoodieMetaserverConfig extends HoodieConfig {
}
public String getDatabaseName() {
- return getStringOrDefault(DATABASE_NAME);
+ return getString(DATABASE_NAME);
}
public String getTableName() {
- return getStringOrDefault(TABLE_NAME);
+ return getString(TABLE_NAME);
}
public String getMetaserverUris() {
diff --git a/packaging/bundle-validation/service/read.scala b/packaging/bundle-validation/service/read.scala
index 471ae4ac68f..275d26349e2 100644
--- a/packaging/bundle-validation/service/read.scala
+++ b/packaging/bundle-validation/service/read.scala
@@ -24,5 +24,5 @@ spark.read.format("hudi").
option("hoodie.metadata.enabled", "false").
option("hoodie.metaserver.enabled", "true").
option("hoodie.metaserver.uris", "thrift://localhost:9090").
- load(basePath).coalesce(1).write.csv("/tmp/sparksql/trips/results")
+ load(basePath).coalesce(1).write.csv("/tmp/metaserver-bundle/sparkdatasource/trips/results")
System.exit(0)
diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh
index 4363072b481..fbc35ddb497 100755
--- a/packaging/bundle-validation/validate.sh
+++ b/packaging/bundle-validation/validate.sh
@@ -58,15 +58,15 @@ test_spark_hadoop_mr_bundles () {
echo "::warning::validate.sh Query and validate the results using Spark SQL"
# save Spark SQL query results
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar \
- -i <(echo 'spark.sql("select * from trips").coalesce(1).write.csv("/tmp/sparksql/trips/results"); System.exit(0)')
- numRecordsSparkSQL=$(cat /tmp/sparksql/trips/results/*.csv | wc -l)
- if [ "$numRecordsSparkSQL" -ne 10 ]; then
+ -i <(echo 'spark.sql("select * from trips").coalesce(1).write.csv("/tmp/spark-bundle/sparksql/trips/results"); System.exit(0)')
+ numRecords=$(cat /tmp/spark-bundle/sparksql/trips/results/*.csv | wc -l)
+ if [ "$numRecords" -ne 10 ]; then
echo "::error::validate.sh Spark SQL validation failed."
exit 1
fi
echo "::warning::validate.sh Query and validate the results using HiveQL"
# save HiveQL query results
- hiveqlresultsdir=/tmp/hiveql/trips/results
+ hiveqlresultsdir=/tmp/hadoop-mr-bundle/hiveql/trips/results
mkdir -p $hiveqlresultsdir
$HIVE_HOME/bin/beeline --hiveconf hive.input.format=org.apache.hudi.hadoop.HoodieParquetInputFormat \
-u jdbc:hive2://localhost:10000/default --showHeader=false --outputformat=csv2 \
@@ -153,7 +153,11 @@ test_flink_bundle() {
local EXIT_CODE=$?
$FLINK_HOME/bin/stop-cluster.sh
unset HADOOP_CLASSPATH
- exit $EXIT_CODE
+ if [ "$EXIT_CODE" -ne 0 ]; then
+ echo "::error::validate.sh Flink bundle validation failed."
+ exit 1
+ fi
+ echo "::warning::validate.sh done validating Flink bundle validation was successful."
}
@@ -184,6 +188,11 @@ test_kafka_connect_bundle() {
$WORKDIR/kafka/consume.sh
local EXIT_CODE=$?
kill $ZOOKEEPER_PID $KAFKA_SERVER_PID $SCHEMA_REG_PID
+ if [ "$EXIT_CODE" -ne 0 ]; then
+ echo "::error::validate.sh Kafka Connect bundle validation failed."
+ exit 1
+ fi
+ echo "::warning::validate.sh done validating Kafka Connect bundle validation was successful."
}
##
@@ -192,11 +201,12 @@ test_kafka_connect_bundle() {
# env vars (defined in container):
# SPARK_HOME: path to the spark directory
##
-test_hudi_metaserver_bundles () {
- echo "::warning::validate.sh setting up hudi metaserver bundles validation"
+test_metaserver_bundle () {
+ echo "::warning::validate.sh setting up Metaserver bundle validation"
- echo "::warning::validate.sh Start hudi metaserver"
- java -jar $JARS_DIR/metaserver.jar & local METASEVER=$!
+ echo "::warning::validate.sh Start Metaserver"
+ java -jar $JARS_DIR/metaserver.jar &
+ local METASEVER_PID=$!
echo "::warning::validate.sh Start hive server"
$DERBY_HOME/bin/startNetworkServer -h 0.0.0.0 &
@@ -208,18 +218,18 @@ test_hudi_metaserver_bundles () {
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/service/write.scala
ls /tmp/hudi-bundles/tests/trips
- echo "::warning::validate.sh Query and validate the results using Spark SQL"
- # save Spark SQL query results
+ echo "::warning::validate.sh Query and validate the results using Spark DataSource"
+ # save Spark DataSource query results
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/service/read.scala
- numRecordsSparkSQL=$(cat /tmp/sparksql/trips/results/*.csv | wc -l)
- echo $numRecordsSparkSQL
- if [ "$numRecordsSparkSQL" -ne 10 ]; then
- echo "::error::validate.sh Spark SQL validation failed."
+ numRecords=$(cat /tmp/metaserver-bundle/sparkdatasource/trips/results/*.csv | wc -l)
+ echo $numRecords
+ if [ "$numRecords" -ne 10 ]; then
+ echo "::error::validate.sh Metaserver bundle validation failed."
exit 1
fi
- echo "::warning::validate.sh hudi metaserver validation was successful."
- kill $DERBY_PID $HIVE_PID $METASEVER
+ echo "::warning::validate.sh Metaserver bundle validation was successful."
+ kill $DERBY_PID $HIVE_PID $METASEVER_PID
}
@@ -267,9 +277,9 @@ if [ "$?" -ne 0 ]; then
fi
echo "::warning::validate.sh done validating kafka connect bundle"
-echo "::warning::validate.sh validating hudi metaserver bundle"
-test_hudi_metaserver_bundles
+echo "::warning::validate.sh validating metaserver bundle"
+test_metaserver_bundle
if [ "$?" -ne 0 ]; then
exit 1
fi
-echo "::warning::validate.sh done validating hudi metaserver bundle"
+echo "::warning::validate.sh done validating metaserver bundle"