You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2022/07/21 06:00:19 UTC
[hudi] 01/01: Revert "[HUDI-4324] Remove use_jdbc config from hudi sync (#6072)"
This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch revert-6072-HUDI-4324-remove-use-jdbc
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit a64557126cfb05c57b34142e449569f25959a12d
Author: Shiyan Xu <27...@users.noreply.github.com>
AuthorDate: Wed Jul 20 23:00:12 2022 -0700
Revert "[HUDI-4324] Remove use_jdbc config from hudi sync (#6072)"
This reverts commit 046044c83d6382d455cfb0ff5c1130ddf926fcf3.
---
conf/hudi-defaults.conf.template | 2 +-
docker/demo/config/hoodie-incr.properties | 3 +-
docker/demo/sparksql-incremental.commands | 2 --
.../util/TestDFSPropertiesConfiguration.java | 2 +-
.../resources/external-config/hudi-defaults.conf | 2 +-
.../apache/hudi/configuration/FlinkOptions.java | 6 ++++
.../apache/hudi/sink/utils/HiveSyncContext.java | 2 ++
.../apache/hudi/streamer/FlinkStreamerConfig.java | 4 +++
hudi-kafka-connect/demo/config-sink-hive.json | 1 +
.../hudi/connect/writers/KafkaConnectConfigs.java | 1 +
.../scala/org/apache/hudi/DataSourceOptions.scala | 10 +++++++
.../scala/org/apache/hudi/HoodieWriterUtils.scala | 1 +
.../java/org/apache/hudi/hive/HiveSyncConfig.java | 5 ++++
.../org/apache/hudi/hive/HiveSyncConfigHolder.java | 11 +++++++-
.../org/apache/hudi/hive/HoodieHiveSyncClient.java | 32 +++++++++++++---------
.../apache/hudi/hive/ddl/HiveQueryDDLExecutor.java | 2 +-
.../org/apache/hudi/hive/ddl/JDBCExecutor.java | 2 +-
.../replication/HiveSyncGlobalCommitParams.java | 2 --
.../org/apache/hudi/hive/TestHiveSyncTool.java | 2 +-
.../hudi/utilities/HoodieDropPartitionsTool.java | 9 ++++--
20 files changed, 72 insertions(+), 29 deletions(-)
diff --git a/conf/hudi-defaults.conf.template b/conf/hudi-defaults.conf.template
index fbcedb3f18..175dbaf23d 100644
--- a/conf/hudi-defaults.conf.template
+++ b/conf/hudi-defaults.conf.template
@@ -20,7 +20,7 @@
# Example:
# hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000
-# hoodie.datasource.hive_sync.mode jdbc
+# hoodie.datasource.hive_sync.use_jdbc true
# hoodie.datasource.hive_sync.support_timestamp false
# hoodie.index.type BLOOM
# hoodie.metadata.enable false
diff --git a/docker/demo/config/hoodie-incr.properties b/docker/demo/config/hoodie-incr.properties
index 47bfc95b53..80f474b1e7 100644
--- a/docker/demo/config/hoodie-incr.properties
+++ b/docker/demo/config/hoodie-incr.properties
@@ -28,6 +28,5 @@ hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test
hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true
# hive sync
hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2
-hoodie.datasource.hive_sync.mode=jdbc
hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000
-hoodie.datasource.hive_sync.partition_fields=partition
+hoodie.datasource.hive_sync.partition_fields=partition
\ No newline at end of file
diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands
index 3d7da63703..9ec586e49d 100644
--- a/docker/demo/sparksql-incremental.commands
+++ b/docker/demo/sparksql-incremental.commands
@@ -47,7 +47,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor").
option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor").
option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default").
- option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc").
option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
option(HiveSyncConfigHolder.HIVE_USER.key(), "hive").
option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive").
@@ -80,7 +79,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor_bs").
option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor_bs").
option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default").
- option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc").
option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
option(HiveSyncConfigHolder.HIVE_USER.key(), "hive").
option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive").
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java
index a122f414f9..465739340d 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java
@@ -185,7 +185,7 @@ public class TestDFSPropertiesConfiguration {
DFSPropertiesConfiguration.refreshGlobalProps();
assertEquals(5, DFSPropertiesConfiguration.getGlobalProps().size());
assertEquals("jdbc:hive2://localhost:10000", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.jdbcurl"));
- assertEquals("jdbc", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.mode"));
+ assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.use_jdbc"));
assertEquals("false", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.support_timestamp"));
assertEquals("BLOOM", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.index.type"));
assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.metadata.enable"));
diff --git a/hudi-common/src/test/resources/external-config/hudi-defaults.conf b/hudi-common/src/test/resources/external-config/hudi-defaults.conf
index 2e4c3a5d75..1133adb4d7 100644
--- a/hudi-common/src/test/resources/external-config/hudi-defaults.conf
+++ b/hudi-common/src/test/resources/external-config/hudi-defaults.conf
@@ -20,7 +20,7 @@
# Example:
hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000
-hoodie.datasource.hive_sync.mode jdbc
+hoodie.datasource.hive_sync.use_jdbc true
hoodie.datasource.hive_sync.support_timestamp false
hoodie.index.type BLOOM
hoodie.metadata.enable true
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index 7425540de9..71b7976e14 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -755,6 +755,12 @@ public class FlinkOptions extends HoodieConfig {
.defaultValue(false)
.withDescription("Assume partitioning is yyyy/mm/dd, default false");
+ public static final ConfigOption<Boolean> HIVE_SYNC_USE_JDBC = ConfigOptions
+ .key("hive_sync.use_jdbc")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription("Use JDBC when hive synchronization is enabled, default true");
+
public static final ConfigOption<Boolean> HIVE_SYNC_AUTO_CREATE_DB = ConfigOptions
.key("hive_sync.auto_create_db")
.booleanType()
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
index cceab5a615..e34adac580 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
@@ -43,6 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
+import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT;
import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION;
@@ -104,6 +105,7 @@ public class HiveSyncContext {
props.setPropertyIfNonNull(HIVE_TABLE_SERDE_PROPERTIES.key(), conf.getString(FlinkOptions.HIVE_SYNC_TABLE_SERDE_PROPERTIES));
props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(), String.join(",", FilePathUtils.extractHivePartitionFields(conf)));
props.setPropertyIfNonNull(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME));
+ props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC)));
props.setPropertyIfNonNull(META_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(conf.getBoolean(FlinkOptions.METADATA_ENABLED)));
props.setPropertyIfNonNull(HIVE_IGNORE_EXCEPTIONS.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS)));
props.setPropertyIfNonNull(HIVE_SUPPORT_TIMESTAMP_TYPE.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_SUPPORT_TIMESTAMP)));
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
index 1083754ca2..e9574dd52b 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
@@ -320,6 +320,9 @@ public class FlinkStreamerConfig extends Configuration {
@Parameter(names = {"--hive-sync-assume-date-partitioning"}, description = "Assume partitioning is yyyy/mm/dd, default false")
public Boolean hiveSyncAssumeDatePartition = false;
+ @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization is enabled, default true")
+ public Boolean hiveSyncUseJdbc = true;
+
@Parameter(names = {"--hive-sync-auto-create-db"}, description = "Auto create hive database if it does not exists, default true")
public Boolean hiveSyncAutoCreateDb = true;
@@ -416,6 +419,7 @@ public class FlinkStreamerConfig extends Configuration {
conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_FIELDS, config.hiveSyncPartitionFields);
conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME, config.hiveSyncPartitionExtractorClass);
conf.setBoolean(FlinkOptions.HIVE_SYNC_ASSUME_DATE_PARTITION, config.hiveSyncAssumeDatePartition);
+ conf.setBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC, config.hiveSyncUseJdbc);
conf.setBoolean(FlinkOptions.HIVE_SYNC_AUTO_CREATE_DB, config.hiveSyncAutoCreateDb);
conf.setBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS, config.hiveSyncIgnoreExceptions);
conf.setBoolean(FlinkOptions.HIVE_SYNC_SKIP_RO_SUFFIX, config.hiveSyncSkipRoSuffix);
diff --git a/hudi-kafka-connect/demo/config-sink-hive.json b/hudi-kafka-connect/demo/config-sink-hive.json
index 7c49784cbf..214fd18919 100644
--- a/hudi-kafka-connect/demo/config-sink-hive.json
+++ b/hudi-kafka-connect/demo/config-sink-hive.json
@@ -21,6 +21,7 @@
"hoodie.datasource.hive_sync.table": "huditesttopic",
"hoodie.datasource.hive_sync.partition_fields": "date",
"hoodie.datasource.hive_sync.partition_extractor_class": "org.apache.hudi.hive.MultiPartKeysValueExtractor",
+ "hoodie.datasource.hive_sync.use_jdbc": "false",
"hoodie.datasource.hive_sync.mode": "hms",
"dfs.client.use.datanode.hostname": "true",
"hive.metastore.uris": "thrift://hivemetastore:9083",
diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java
index 3b51fddfa8..e4543c692d 100644
--- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java
+++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java
@@ -172,6 +172,7 @@ public class KafkaConnectConfigs extends HoodieConfig {
public static final String HIVE_URL = "hoodie.datasource.hive_sync.jdbcurl";
public static final String HIVE_PARTITION_FIELDS = "hoodie.datasource.hive_sync.partition_fields";
public static final String HIVE_PARTITION_EXTRACTOR_CLASS = "hoodie.datasource.hive_sync.partition_extractor_class";
+ public static final String HIVE_USE_JDBC = "hoodie.datasource.hive_sync.use_jdbc";
public static final String HIVE_SYNC_MODE = "hoodie.datasource.hive_sync.mode";
public static final String HIVE_AUTO_CREATE_DATABASE = "hoodie.datasource.hive_sync.auto_create_database";
public static final String HIVE_IGNORE_EXCEPTIONS = "hoodie.datasource.hive_sync.ignore_exceptions";
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index feeb572126..6adc66265f 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -437,6 +437,10 @@ object DataSourceWriteOptions {
val HIVE_ASSUME_DATE_PARTITION: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION
@Deprecated
val HIVE_USE_PRE_APACHE_INPUT_FORMAT: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT
+
+ /** @deprecated Use {@link HIVE_SYNC_MODE} instead of this config from 0.9.0 */
+ @Deprecated
+ val HIVE_USE_JDBC: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_JDBC
@Deprecated
val HIVE_AUTO_CREATE_DATABASE: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE
@Deprecated
@@ -496,6 +500,9 @@ object DataSourceWriteOptions {
/** @deprecated Use {@link HIVE_USE_PRE_APACHE_INPUT_FORMAT} and its methods instead */
@Deprecated
val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT.key()
+ /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */
+ @Deprecated
+ val HIVE_USE_JDBC_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_JDBC.key()
/** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */
@Deprecated
val HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.key()
@@ -686,6 +693,9 @@ object DataSourceWriteOptions {
val DEFAULT_HIVE_ASSUME_DATE_PARTITION_OPT_VAL = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION.defaultValue()
@Deprecated
val DEFAULT_USE_PRE_APACHE_INPUT_FORMAT_OPT_VAL = "false"
+ /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */
+ @Deprecated
+ val DEFAULT_HIVE_USE_JDBC_OPT_VAL = HiveSyncConfigHolder.HIVE_USE_JDBC.defaultValue()
/** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */
@Deprecated
val DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.defaultValue()
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
index f6473c2b89..f9d8a60004 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
@@ -78,6 +78,7 @@ object HoodieWriterUtils {
hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS)
hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS)
hoodieConfig.setDefaultValue(HIVE_STYLE_PARTITIONING)
+ hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_USE_JDBC)
hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE)
hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_SYNC_AS_DATA_SOURCE_TABLE)
hoodieConfig.setDefaultValue(ASYNC_COMPACT_ENABLE)
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
index cdb192f9fe..6f2cc50a0a 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
@@ -45,6 +45,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
+import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT;
import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS;
@@ -94,6 +95,9 @@ public class HiveSyncConfig extends HoodieSyncConfig {
+ "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to "
+ "org.apache.hudi input format.")
public Boolean usePreApacheInputFormat;
+ @Deprecated
+ @Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url")
+ public Boolean useJdbc;
@Parameter(names = {"--metastore-uris"}, description = "Hive metastore uris")
public String metastoreUris;
@Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms,glue,jdbc and hiveql")
@@ -138,6 +142,7 @@ public class HiveSyncConfig extends HoodieSyncConfig {
props.setPropertyIfNonNull(HIVE_PASS.key(), hivePass);
props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl);
props.setPropertyIfNonNull(HIVE_USE_PRE_APACHE_INPUT_FORMAT.key(), usePreApacheInputFormat);
+ props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), useJdbc);
props.setPropertyIfNonNull(HIVE_SYNC_MODE.key(), syncMode);
props.setPropertyIfNonNull(METASTORE_URIS.key(), metastoreUris);
props.setPropertyIfNonNull(HIVE_AUTO_CREATE_DATABASE.key(), autoCreateDatabase);
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
index 8c14cdfe29..3877782c92 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
@@ -52,6 +52,15 @@ public class HiveSyncConfigHolder {
.withDocumentation("Flag to choose InputFormat under com.uber.hoodie package instead of org.apache.hudi package. "
+ "Use this when you are in the process of migrating from "
+ "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format");
+ /**
+ * @deprecated Use {@link #HIVE_SYNC_MODE} instead of this config from 0.9.0
+ */
+ @Deprecated
+ public static final ConfigProperty<String> HIVE_USE_JDBC = ConfigProperty
+ .key("hoodie.datasource.hive_sync.use_jdbc")
+ .defaultValue("true")
+ .deprecatedAfter("0.9.0")
+ .withDocumentation("Use JDBC when hive synchronization is enabled");
public static final ConfigProperty<String> METASTORE_URIS = ConfigProperty
.key("hoodie.datasource.hive_sync.metastore.uris")
.defaultValue("thrift://localhost:9083")
@@ -100,7 +109,7 @@ public class HiveSyncConfigHolder {
.withDocumentation("The number of partitions one batch when synchronous partitions to hive.");
public static final ConfigProperty<String> HIVE_SYNC_MODE = ConfigProperty
.key("hoodie.datasource.hive_sync.mode")
- .defaultValue("jdbc")
+ .noDefaultValue()
.withDocumentation("Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.");
public static final ConfigProperty<Boolean> HIVE_SYNC_BUCKET_SYNC = ConfigProperty
.key("hoodie.datasource.hive_sync.bucket_sync")
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
index 26ba4ae8e1..d5a85adcba 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
@@ -21,6 +21,7 @@ package org.apache.hudi.hive;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.ddl.DDLExecutor;
import org.apache.hudi.hive.ddl.HMSDDLExecutor;
@@ -48,6 +49,7 @@ import java.util.stream.Collectors;
import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE;
+import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
@@ -70,19 +72,23 @@ public class HoodieHiveSyncClient extends HoodieSyncClient {
// Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should
// disable jdbc and depend on metastore client for all hive registrations
try {
- HiveSyncMode syncMode = HiveSyncMode.of(config.getStringOrDefault(HIVE_SYNC_MODE));
- switch (syncMode) {
- case HMS:
- ddlExecutor = new HMSDDLExecutor(config);
- break;
- case HIVEQL:
- ddlExecutor = new HiveQueryDDLExecutor(config);
- break;
- case JDBC:
- ddlExecutor = new JDBCExecutor(config);
- break;
- default:
- throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE));
+ if (!StringUtils.isNullOrEmpty(config.getString(HIVE_SYNC_MODE))) {
+ HiveSyncMode syncMode = HiveSyncMode.of(config.getString(HIVE_SYNC_MODE));
+ switch (syncMode) {
+ case HMS:
+ ddlExecutor = new HMSDDLExecutor(config);
+ break;
+ case HIVEQL:
+ ddlExecutor = new HiveQueryDDLExecutor(config);
+ break;
+ case JDBC:
+ ddlExecutor = new JDBCExecutor(config);
+ break;
+ default:
+ throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE));
+ }
+ } else {
+ ddlExecutor = config.getBoolean(HIVE_USE_JDBC) ? new JDBCExecutor(config) : new HiveQueryDDLExecutor(config);
}
this.client = Hive.get(config.getHiveConf()).getMSC();
} catch (Exception e) {
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
index 619a417c33..90efd2701c 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
@@ -47,7 +47,7 @@ import java.util.stream.Collectors;
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
/**
- * This class offers DDL executor backed by the HiveQL Driver.
+ * This class offers DDL executor backed by the hive.ql Driver This class preserves the old useJDBC = false way of doing things.
*/
public class HiveQueryDDLExecutor extends QueryBasedDDLExecutor {
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java
index 1fc8f082d8..2673e46a9f 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java
@@ -43,7 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
import static org.apache.hudi.hive.util.HiveSchemaUtil.HIVE_ESCAPE_CHARACTER;
/**
- * This class offers DDL executor backed by the jdbc.
+ * This class offers DDL executor backed by the jdbc This class preserves the old useJDBC = true way of doing things.
*/
public class JDBCExecutor extends QueryBasedDDLExecutor {
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java
index b8b2de73e7..58188f578e 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java
@@ -32,7 +32,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
-import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
@@ -93,7 +92,6 @@ public class HiveSyncGlobalCommitParams {
String jdbcUrl = forRemote ? loadedProps.getProperty(REMOTE_HIVE_SERVER_JDBC_URLS)
: loadedProps.getProperty(LOCAL_HIVE_SERVER_JDBC_URLS, loadedProps.getProperty(HIVE_URL.key()));
props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl);
- props.setProperty(HIVE_SYNC_MODE.key(), "jdbc");
LOG.info("building hivesync config forRemote: " + forRemote + " " + jdbcUrl + " "
+ basePath);
return props;
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
index 7abeed480c..072feeb663 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
@@ -120,7 +120,7 @@ public class TestHiveSyncTool {
return opts;
}
- // (useSchemaFromCommitMetadata, syncAsDataSource, syncMode)
+ // (useJdbc, useSchemaFromCommitMetadata, syncAsDataSource)
private static Iterable<Object[]> syncDataSourceTableParams() {
List<Object[]> opts = new ArrayList<>();
for (Object mode : SYNC_MODES) {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java
index fc16bcaa1b..95e84e413c 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java
@@ -174,6 +174,8 @@ public class HoodieDropPartitionsTool implements Serializable {
public String hiveURL = "jdbc:hive2://localhost:10000";
@Parameter(names = {"--hive-partition-field"}, description = "Comma separated list of field in the hive table to use for determining hive partition columns.", required = false)
public String hivePartitionsField = "";
+ @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization.", required = false)
+ public boolean hiveUseJdbc = true;
@Parameter(names = {"--hive-metastore-uris"}, description = "hive meta store uris to use.", required = false)
public String hiveHMSUris = null;
@Parameter(names = {"--hive-sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.", required = false)
@@ -213,7 +215,7 @@ public class HoodieDropPartitionsTool implements Serializable {
+ " --hive-pass-word " + "Masked" + ", \n"
+ " --hive-jdbc-url " + hiveURL + ", \n"
+ " --hive-partition-field " + hivePartitionsField + ", \n"
- + " --hive-sync-mode " + hiveSyncMode + ", \n"
+ + " --hive-sync-use-jdbc " + hiveUseJdbc + ", \n"
+ " --hive-metastore-uris " + hiveHMSUris + ", \n"
+ " --hive-sync-ignore-exception " + hiveSyncIgnoreException + ", \n"
+ " --hive-partition-value-extractor-class " + partitionValueExtractorClass + ", \n"
@@ -245,7 +247,7 @@ public class HoodieDropPartitionsTool implements Serializable {
&& Objects.equals(hivePassWord, config.hivePassWord)
&& Objects.equals(hiveURL, config.hiveURL)
&& Objects.equals(hivePartitionsField, config.hivePartitionsField)
- && Objects.equals(hiveSyncMode, config.hiveSyncMode)
+ && Objects.equals(hiveUseJdbc, config.hiveUseJdbc)
&& Objects.equals(hiveHMSUris, config.hiveHMSUris)
&& Objects.equals(partitionValueExtractorClass, config.partitionValueExtractorClass)
&& Objects.equals(sparkMaster, config.sparkMaster)
@@ -259,7 +261,7 @@ public class HoodieDropPartitionsTool implements Serializable {
public int hashCode() {
return Objects.hash(basePath, runningMode, tableName, partitions, instantTime,
syncToHive, hiveDataBase, hiveTableName, hiveUserName, hivePassWord, hiveURL,
- hivePartitionsField, hiveSyncMode, hiveHMSUris, partitionValueExtractorClass,
+ hivePartitionsField, hiveUseJdbc, hiveHMSUris, partitionValueExtractorClass,
sparkMaster, sparkMemory, propsFilePath, configs, hiveSyncIgnoreException, help);
}
}
@@ -348,6 +350,7 @@ public class HoodieDropPartitionsTool implements Serializable {
props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord);
props.put(DataSourceWriteOptions.HIVE_URL().key(), cfg.hiveURL);
props.put(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), cfg.hivePartitionsField);
+ props.put(DataSourceWriteOptions.HIVE_USE_JDBC().key(), cfg.hiveUseJdbc);
props.put(DataSourceWriteOptions.HIVE_SYNC_MODE().key(), cfg.hiveSyncMode);
props.put(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().key(), cfg.hiveSyncIgnoreException);
props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord);