You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/09/08 10:23:04 UTC
[kylin] 01/01: KYLIN-4660 Refine kylin-default.properties

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit bc94f4978bc3009a5ad7853c2c6f86c8f2f1c39d
Author: XiaoxiangYu <xx...@apache.org>
AuthorDate: Tue Sep 8 18:19:49 2020 +0800

    KYLIN-4660 Refine kylin-default.properties
---
 build/bin/download-spark.sh                        |  2 +-
 .../src/main/resources/kylin-defaults.properties   | 31 +++++++++++++++++-----
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/build/bin/download-spark.sh b/build/bin/download-spark.sh
index 906a23a..03107c4 100755
--- a/build/bin/download-spark.sh
+++ b/build/bin/download-spark.sh
@@ -57,7 +57,7 @@ unalias md5cmd
 
 echo "Start to decompress package"
 tar -zxvf spark-${spark_version}-bin-hadoop2.7.tgz  || { exit 1; }
-mv spark-${spark_version}-bin-hadoop2.7.tgz spark
+mv spark-${spark_version}-bin-hadoop2.7 spark
 
 # Remove unused components in Spark
 rm -rf spark/lib/spark-examples-*
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index 4eb2f17..2dfc5f6 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -17,7 +17,8 @@
 
 ### METADATA | ENV ###
 
-# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended
+# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended in Kylin 4.X
+# Please refer to https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0 if you prefer HBase
 kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
 
 # metadata cache sync retry times
@@ -91,6 +92,15 @@ kylin.web.default-time-filter=1
 # When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
 kylin.source.hive.metadata-type=hcatalog
 
+# Hive client, valid value [cli, beeline]
+kylin.source.hive.client=cli
+
+# Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
+kylin.source.hive.beeline-shell=beeline
+
+# Hive database name for putting the intermediate flat tables
+kylin.source.hive.database-for-flat-table=default
+
 ### STORAGE ###
 
 # The storage for final cube file in hbase
@@ -225,7 +235,7 @@ kylin.security.saml.context-server-name=hostname
 kylin.security.saml.context-server-port=443
 kylin.security.saml.context-path=/kylin
 
-### SPARK BUILD/MERGE ENGINE CONFIGS ###
+### SPARK BUILD ENGINE CONFIGS ###
 
 # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
 # This must contain site xmls of core, yarn, hive, and hbase in one folder
@@ -257,20 +267,27 @@ kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
 #kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
 #kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
 
-### SPARK QUERY ENGINE CONFIGS ###
+### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+# Enlarge cores and memory to improve query performance in production env, please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+
 kylin.query.spark-conf.spark.master=yarn
 #kylin.query.spark-conf.spark.submit.deployMode=client
 kylin.query.spark-conf.spark.driver.cores=1
 kylin.query.spark-conf.spark.driver.memory=4G
 kylin.query.spark-conf.spark.driver.memoryOverhead=1G
-kylin.query.spark-conf.spark.executor.cores=5
-kylin.query.spark-conf.spark.executor.instances=4
-kylin.query.spark-conf.spark.executor.memory=20G
-kylin.query.spark-conf.spark.executor.memoryOverhead=2G
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memory=4G
+kylin.query.spark-conf.spark.executor.memoryOverhead=1G
 kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
 #kylin.query.spark-conf.spark.sql.shuffle.partitions=40
 #kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
 
+# uncomment for HDP
+#kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+#kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+
 ### QUERY PUSH DOWN ###
 
 #kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl