You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/09/08 10:23:04 UTC
[kylin] 01/01: KYLIN-4660 Refine kylin-default.properties
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit bc94f4978bc3009a5ad7853c2c6f86c8f2f1c39d
Author: XiaoxiangYu <xx...@apache.org>
AuthorDate: Tue Sep 8 18:19:49 2020 +0800
KYLIN-4660 Refine kylin-default.properties
---
build/bin/download-spark.sh | 2 +-
.../src/main/resources/kylin-defaults.properties | 31 +++++++++++++++++-----
2 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/build/bin/download-spark.sh b/build/bin/download-spark.sh
index 906a23a..03107c4 100755
--- a/build/bin/download-spark.sh
+++ b/build/bin/download-spark.sh
@@ -57,7 +57,7 @@ unalias md5cmd
echo "Start to decompress package"
tar -zxvf spark-${spark_version}-bin-hadoop2.7.tgz || { exit 1; }
-mv spark-${spark_version}-bin-hadoop2.7.tgz spark
+mv spark-${spark_version}-bin-hadoop2.7 spark
# Remove unused components in Spark
rm -rf spark/lib/spark-examples-*
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index 4eb2f17..2dfc5f6 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -17,7 +17,8 @@
### METADATA | ENV ###
-# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended
+# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended in Kylin 4.X
+# Please refer to https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0 if you prefer HBase
kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
# metadata cache sync retry times
@@ -91,6 +92,15 @@ kylin.web.default-time-filter=1
# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
kylin.source.hive.metadata-type=hcatalog
+# Hive client, valid value [cli, beeline]
+kylin.source.hive.client=cli
+
+# Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
+kylin.source.hive.beeline-shell=beeline
+
+# Hive database name for putting the intermediate flat tables
+kylin.source.hive.database-for-flat-table=default
+
### STORAGE ###
# The storage for final cube file in hbase
@@ -225,7 +235,7 @@ kylin.security.saml.context-server-name=hostname
kylin.security.saml.context-server-port=443
kylin.security.saml.context-path=/kylin
-### SPARK BUILD/MERGE ENGINE CONFIGS ###
+### SPARK BUILD ENGINE CONFIGS ###
# Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
# This must contain site xmls of core, yarn, hive, and hbase in one folder
@@ -257,20 +267,27 @@ kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
#kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
#kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
-### SPARK QUERY ENGINE CONFIGS ###
+### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+# Enlarge cores and memory to improve query performance in production env, please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+
kylin.query.spark-conf.spark.master=yarn
#kylin.query.spark-conf.spark.submit.deployMode=client
kylin.query.spark-conf.spark.driver.cores=1
kylin.query.spark-conf.spark.driver.memory=4G
kylin.query.spark-conf.spark.driver.memoryOverhead=1G
-kylin.query.spark-conf.spark.executor.cores=5
-kylin.query.spark-conf.spark.executor.instances=4
-kylin.query.spark-conf.spark.executor.memory=20G
-kylin.query.spark-conf.spark.executor.memoryOverhead=2G
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memory=4G
+kylin.query.spark-conf.spark.executor.memoryOverhead=1G
kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
#kylin.query.spark-conf.spark.sql.shuffle.partitions=40
#kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+# uncomment for HDP
+#kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+#kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+
### QUERY PUSH DOWN ###
#kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl