You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by na...@apache.org on 2019/03/13 23:14:36 UTC
[incubator-hudi] branch master updated: Fix hive sync (libfb
version mismatch) and deltastreamer issue (missing cmdline argument) in
demo
This is an automated email from the ASF dual-hosted git repository.
nagarwal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push:
new adc8cac Fix hive sync (libfb version mismatch) and deltastreamer issue (missing cmdline argument) in demo
adc8cac is described below
commit adc8cac74378c16f2508adbf16a6a51d241a3e35
Author: Balaji Varadarajan <va...@uber.com>
AuthorDate: Fri Mar 1 11:17:53 2019 -0800
Fix hive sync (libfb version mismatch) and deltastreamer issue (missing cmdline argument) in demo
---
docs/quickstart.md | 9 +++++----
hoodie-hive/pom.xml | 2 +-
packaging/hoodie-hive-bundle/pom.xml | 7 ++++++-
packaging/hoodie-presto-bundle/pom.xml | 2 +-
pom.xml | 1 +
5 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 70848d0..882e660 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -450,13 +450,14 @@ automatically initializes the datasets in the file-system if they do not exist y
docker exec -it adhoc-2 /bin/bash
# Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow dataset in HDFS
-spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --props /var/demo/config/kafka-source.properties
+spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties
....
....
2018-09-24 22:20:00 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:54 - OutputCommitCoordinator stopped!
2018-09-24 22:20:00 INFO SparkContext:54 - Successfully stopped SparkContext
+
# Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor dataset in HDFS
-spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --props /var/demo/config/kafka-source.properties
+spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties
....
2018-09-24 22:22:01 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:54 - OutputCommitCoordinator stopped!
2018-09-24 22:22:01 INFO SparkContext:54 - Successfully stopped SparkContext
@@ -724,10 +725,10 @@ cat docker/demo/data/batch_2.json | kafkacat -b kafkabroker -t stock_ticks -P
docker exec -it adhoc-2 /bin/bash
# Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow dataset in HDFS
-spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --props /var/demo/config/kafka-source.properties
+spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties
# Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor dataset in HDFS
-spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --props /var/demo/config/kafka-source.properties
+spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties
exit
```
diff --git a/hoodie-hive/pom.xml b/hoodie-hive/pom.xml
index ff7487a..a856a5e 100644
--- a/hoodie-hive/pom.xml
+++ b/hoodie-hive/pom.xml
@@ -50,7 +50,7 @@
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
- <version>0.12.0</version>
+ <version>${thrift.version}</version>
</dependency>
<dependency>
diff --git a/packaging/hoodie-hive-bundle/pom.xml b/packaging/hoodie-hive-bundle/pom.xml
index 52021c3..6146236 100644
--- a/packaging/hoodie-hive-bundle/pom.xml
+++ b/packaging/hoodie-hive-bundle/pom.xml
@@ -71,7 +71,12 @@
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
- <version>0.9.2</version>
+ <version>${thrift.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libfb303</artifactId>
+ <version>0.9.3</version>
</dependency>
<dependency>
diff --git a/packaging/hoodie-presto-bundle/pom.xml b/packaging/hoodie-presto-bundle/pom.xml
index c4a2bdb..750fba9 100644
--- a/packaging/hoodie-presto-bundle/pom.xml
+++ b/packaging/hoodie-presto-bundle/pom.xml
@@ -51,7 +51,7 @@
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
- <version>0.9.2</version>
+ <version>${thrift.version}</version>
</dependency>
<dependency>
diff --git a/pom.xml b/pom.xml
index b9e9bb8..d379920 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,6 +138,7 @@
<scala.version>2.11.8</scala.version>
<scala.libversion>2.11</scala.libversion>
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
+ <thrift.version>0.12.0</thrift.version>
</properties>
<scm>