You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/02/16 16:27:11 UTC
[hudi] branch master updated: [MINOR] Fix format name and remove redundant line in examples (#7952)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new c9395adeb21 [MINOR] Fix format name and remove redundant line in examples (#7952)
c9395adeb21 is described below
commit c9395adeb21eca40c4094d79f21df59af39b5c93
Author: Y Ethan Guo <et...@gmail.com>
AuthorDate: Thu Feb 16 08:26:59 2023 -0800
[MINOR] Fix format name and remove redundant line in examples (#7952)
---
.../examples/quickstart/HoodieSparkQuickstart.java | 18 ++++----
.../examples/spark/HoodieDataSourceExample.scala | 54 +++++++++++-----------
.../examples/spark/HoodieMorCompactionJob.scala | 4 +-
.../src/test/python/HoodiePySparkQuickstart.py | 1 -
4 files changed, 38 insertions(+), 39 deletions(-)
diff --git a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
index 325aad437ad..3eea50e4961 100644
--- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
@@ -115,7 +115,7 @@ public final class HoodieSparkQuickstart {
List<String> inserts = dataGen.convertToStringList(dataGen.generateInserts(commitTime, 20));
Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 1));
- df.write().format("org.apache.hudi")
+ df.write().format("hudi")
.options(QuickstartUtils.getQuickstartWriteConfigs())
.option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
.option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
@@ -135,7 +135,7 @@ public final class HoodieSparkQuickstart {
List<String> inserts = dataGen.convertToStringList(dataGen.generateInsertsOnPartition(commitTime, 20, HoodieExampleDataGenerator.DEFAULT_THIRD_PARTITION_PATH));
Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 1));
- df.write().format("org.apache.hudi")
+ df.write().format("hudi")
.options(QuickstartUtils.getQuickstartWriteConfigs())
.option("hoodie.datasource.write.operation", WriteOperationType.INSERT_OVERWRITE.name())
.option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
@@ -154,7 +154,7 @@ public final class HoodieSparkQuickstart {
HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
Dataset<Row> roViewDF = spark
.read()
- .format("org.apache.hudi")
+ .format("hudi")
.load(tablePath + "/*/*/*/*");
roViewDF.createOrReplaceTempView("hudi_ro_table");
@@ -186,7 +186,7 @@ public final class HoodieSparkQuickstart {
String commitTime = Long.toString(System.currentTimeMillis());
List<String> updates = dataGen.convertToStringList(dataGen.generateUniqueUpdates(commitTime));
Dataset<Row> df = spark.read().json(jsc.parallelize(updates, 1));
- df.write().format("org.apache.hudi")
+ df.write().format("hudi")
.options(QuickstartUtils.getQuickstartWriteConfigs())
.option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
.option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
@@ -202,12 +202,12 @@ public final class HoodieSparkQuickstart {
*/
public static Dataset<Row> delete(SparkSession spark, String tablePath, String tableName) {
- Dataset<Row> roViewDF = spark.read().format("org.apache.hudi").load(tablePath + "/*/*/*/*");
+ Dataset<Row> roViewDF = spark.read().format("hudi").load(tablePath + "/*/*/*/*");
roViewDF.createOrReplaceTempView("hudi_ro_table");
Dataset<Row> toBeDeletedDf = spark.sql("SELECT begin_lat, begin_lon, driver, end_lat, end_lon, fare, partitionpath, rider, ts, uuid FROM hudi_ro_table limit 2");
Dataset<Row> df = toBeDeletedDf.select("uuid", "partitionpath", "ts");
- df.write().format("org.apache.hudi")
+ df.write().format("hudi")
.options(QuickstartUtils.getQuickstartWriteConfigs())
.option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
.option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
@@ -224,7 +224,7 @@ public final class HoodieSparkQuickstart {
*/
public static void deleteByPartition(SparkSession spark, String tablePath, String tableName) {
Dataset<Row> df = spark.emptyDataFrame();
- df.write().format("org.apache.hudi")
+ df.write().format("hudi")
.options(QuickstartUtils.getQuickstartWriteConfigs())
.option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
.option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
@@ -253,7 +253,7 @@ public final class HoodieSparkQuickstart {
// incrementally query data
Dataset<Row> incViewDF = spark
.read()
- .format("org.apache.hudi")
+ .format("hudi")
.option("hoodie.datasource.query.type", "incremental")
.option("hoodie.datasource.read.begin.instanttime", beginTime)
.load(tablePath);
@@ -278,7 +278,7 @@ public final class HoodieSparkQuickstart {
String endTime = commits.get(commits.size() - 1); // commit time we are interested in
//incrementally query data
- Dataset<Row> incViewDF = spark.read().format("org.apache.hudi")
+ Dataset<Row> incViewDF = spark.read().format("hudi")
.option("hoodie.datasource.query.type", "incremental")
.option("hoodie.datasource.read.begin.instanttime", beginTime)
.option("hoodie.datasource.read.end.instanttime", endTime)
diff --git a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
index 33c085cba3e..fe5164f33a5 100644
--- a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
@@ -30,16 +30,16 @@ import org.apache.spark.sql.SparkSession
import scala.collection.JavaConversions._
/**
- * Simple examples of [[org.apache.hudi.DefaultSource]]
- *
- * To run this example, you should
- * 1. For running in IDE, set VM options `-Dspark.master=local[2]`
- * 2. For running in shell, using `spark-submit`
- *
- * Usage: HoodieWriteClientExample <tablePath> <tableName>.
- * <tablePath> and <tableName> describe root path of hudi and table name
- * for example, `HoodieDataSourceExample file:///tmp/hoodie/hudi_cow_table hudi_cow_table`
- */
+ * Simple examples of [[org.apache.hudi.DefaultSource]]
+ *
+ * To run this example, you should
+ * 1. For running in IDE, set VM options `-Dspark.master=local[2]`
+ * 2. For running in shell, using `spark-submit`
+ *
+ * Usage: HoodieWriteClientExample <tablePath> <tableName>.
+ * <tablePath> and <tableName> describe root path of hudi and table name
+ * for example, `HoodieDataSourceExample file:///tmp/hoodie/hudi_cow_table hudi_cow_table`
+ */
object HoodieDataSourceExample {
def main(args: Array[String]): Unit = {
@@ -75,7 +75,7 @@ object HoodieDataSourceExample {
val commitTime: String = System.currentTimeMillis().toString
val inserts = dataGen.convertToStringList(dataGen.generateInserts(commitTime, 20))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 1))
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
@@ -90,9 +90,9 @@ object HoodieDataSourceExample {
*/
def queryData(spark: SparkSession, tablePath: String, tableName: String, dataGen: HoodieExampleDataGenerator[HoodieAvroPayload]): Unit = {
val roViewDF = spark.
- read.
- format("org.apache.hudi").
- load(tablePath + "/*/*/*/*")
+ read.
+ format("hudi").
+ load(tablePath + "/*/*/*/*")
roViewDF.createOrReplaceTempView("hudi_ro_table")
@@ -120,7 +120,7 @@ object HoodieDataSourceExample {
val commitTime: String = System.currentTimeMillis().toString
val updates = dataGen.convertToStringList(dataGen.generateUpdates(commitTime, 10))
val df = spark.read.json(spark.sparkContext.parallelize(updates, 1))
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
@@ -135,11 +135,11 @@ object HoodieDataSourceExample {
*/
def delete(spark: SparkSession, tablePath: String, tableName: String): Unit = {
- val roViewDF = spark.read.format("org.apache.hudi").load(tablePath + "/*/*/*/*")
+ val roViewDF = spark.read.format("hudi").load(tablePath + "/*/*/*/*")
roViewDF.createOrReplaceTempView("hudi_ro_table")
val df = spark.sql("select uuid, partitionpath, ts from hudi_ro_table limit 2")
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
@@ -155,7 +155,7 @@ object HoodieDataSourceExample {
*/
def deleteByPartition(spark: SparkSession, tablePath: String, tableName: String): Unit = {
val df = spark.emptyDataFrame
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
@@ -179,10 +179,10 @@ object HoodieDataSourceExample {
// incrementally query data
val incViewDF = spark.
- read.
- format("org.apache.hudi").
- option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL).
- option(BEGIN_INSTANTTIME.key, beginTime).
+ read.
+ format("hudi").
+ option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL).
+ option(BEGIN_INSTANTTIME.key, beginTime).
load(tablePath)
incViewDF.createOrReplaceTempView("hudi_incr_table")
spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0").show()
@@ -200,11 +200,11 @@ object HoodieDataSourceExample {
val endTime = commits(commits.length - 2) // commit time we are interested in
//incrementally query data
- val incViewDF = spark.read.format("org.apache.hudi").
- option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL).
- option(BEGIN_INSTANTTIME.key, beginTime).
- option(END_INSTANTTIME.key, endTime).
- load(tablePath)
+ val incViewDF = spark.read.format("hudi").
+ option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL).
+ option(BEGIN_INSTANTTIME.key, beginTime).
+ option(END_INSTANTTIME.key, endTime).
+ load(tablePath)
incViewDF.createOrReplaceTempView("hudi_incr_table")
spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0").show()
}
diff --git a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
index 8a2c8715b30..4802632ad03 100644
--- a/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
@@ -85,7 +85,7 @@ object HoodieMorCompactionJob {
val commitTime: String = System.currentTimeMillis().toString
val inserts = dataGen.convertToStringList(dataGen.generateInserts(commitTime, 20))
val df = spark.read.json(spark.sparkContext.parallelize(inserts.asScala, 1))
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
@@ -101,7 +101,7 @@ object HoodieMorCompactionJob {
val commitTime: String = System.currentTimeMillis().toString
val updates = dataGen.convertToStringList(dataGen.generateUpdates(commitTime, 10))
val df = spark.read.json(spark.sparkContext.parallelize(updates.asScala, 1))
- df.write.format("org.apache.hudi").
+ df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key, "ts").
option(RECORDKEY_FIELD.key, "uuid").
diff --git a/hudi-examples/hudi-examples-spark/src/test/python/HoodiePySparkQuickstart.py b/hudi-examples/hudi-examples-spark/src/test/python/HoodiePySparkQuickstart.py
index c1303f6365d..caad76ca600 100644
--- a/hudi-examples/hudi-examples-spark/src/test/python/HoodiePySparkQuickstart.py
+++ b/hudi-examples/hudi-examples-spark/src/test/python/HoodiePySparkQuickstart.py
@@ -256,7 +256,6 @@ if __name__ == "__main__":
.appName("Hudi Spark basic example") \
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
.config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar") \
- .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") \
.config("spark.kryoserializer.buffer.max", "512m") \
.config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") \
.getOrCreate()