You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2022/03/14 03:48:29 UTC

[kylin] branch kylin4_on_cloud updated: Fix 0314 (#1833)

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin4_on_cloud
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/kylin4_on_cloud by this push:
     new 85e9f5a  Fix 0314 (#1833)
85e9f5a is described below

commit 85e9f5a7970465ba15ad578b8ac693770f45c34b
Author: Tengting Xu <34...@users.noreply.github.com>
AuthorDate: Mon Mar 14 11:47:30 2022 +0800

    Fix 0314 (#1833)
    
    * # switch on enable MDX
    
    * # smaller default resources for query and job engine
    
    * # update document when using glue
---
 backup/properties/default/kylin.properties         |  8 ++++----
 .../properties/templates/kylin.properties.template |  8 ++++----
 backup/scripts/prepare-ec2-env-for-kylin4.sh       | 23 ++++++++++++++++++----
 .../ec2-cluster-kylin4-template.yaml               |  9 ++++++++-
 cloudformation_templates/ec2-cluster-kylin4.yaml   | 10 +++++++++-
 kylin_configs.yaml                                 |  5 +++++
 readme/prerequisites.md                            |  2 +-
 7 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/backup/properties/default/kylin.properties b/backup/properties/default/kylin.properties
index ef5ef94..aecaa8f 100644
--- a/backup/properties/default/kylin.properties
+++ b/backup/properties/default/kylin.properties
@@ -27,9 +27,9 @@ kylin.engine.spark-conf.spark.history.fs.logDirectory=s3a:/{{ S3_BUCKET_PATH }}/
 kylin.engine.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077
 
 kylin.cube.cubeplanner.enabled=false
-kylin.engine.spark-conf.spark.executor.cores=3
-kylin.engine.spark-conf.spark.executor.instances=20
-kylin.engine.spark-conf.spark.executor.memory=12GB
+kylin.engine.spark-conf.spark.executor.cores=2
+kylin.engine.spark-conf.spark.executor.instances=4
+kylin.engine.spark-conf.spark.executor.memory=7GB
 kylin.engine.spark-conf.spark.executor.memoryOverhead=1GB
 
 ### support prometheus
@@ -47,7 +47,7 @@ kylin.query.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077
 kylin.query.spark-conf.spark.driver.cores=1
 kylin.query.spark-conf.spark.driver.memory=8GB
 kylin.query.spark-conf.spark.driver.memoryOverhead=1G
-kylin.query.spark-conf.spark.executor.instances=30
+kylin.query.spark-conf.spark.executor.instances=2
 kylin.query.spark-conf.spark.executor.cores=2
 kylin.query.spark-conf.spark.executor.memory=7G
 kylin.query.spark-conf.spark.executor.memoryOverhead=1G
diff --git a/backup/properties/templates/kylin.properties.template b/backup/properties/templates/kylin.properties.template
index ef5ef94..aecaa8f 100644
--- a/backup/properties/templates/kylin.properties.template
+++ b/backup/properties/templates/kylin.properties.template
@@ -27,9 +27,9 @@ kylin.engine.spark-conf.spark.history.fs.logDirectory=s3a:/{{ S3_BUCKET_PATH }}/
 kylin.engine.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077
 
 kylin.cube.cubeplanner.enabled=false
-kylin.engine.spark-conf.spark.executor.cores=3
-kylin.engine.spark-conf.spark.executor.instances=20
-kylin.engine.spark-conf.spark.executor.memory=12GB
+kylin.engine.spark-conf.spark.executor.cores=2
+kylin.engine.spark-conf.spark.executor.instances=4
+kylin.engine.spark-conf.spark.executor.memory=7GB
 kylin.engine.spark-conf.spark.executor.memoryOverhead=1GB
 
 ### support prometheus
@@ -47,7 +47,7 @@ kylin.query.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077
 kylin.query.spark-conf.spark.driver.cores=1
 kylin.query.spark-conf.spark.driver.memory=8GB
 kylin.query.spark-conf.spark.driver.memoryOverhead=1G
-kylin.query.spark-conf.spark.executor.instances=30
+kylin.query.spark-conf.spark.executor.instances=2
 kylin.query.spark-conf.spark.executor.cores=2
 kylin.query.spark-conf.spark.executor.memory=7G
 kylin.query.spark-conf.spark.executor.memoryOverhead=1G
diff --git a/backup/scripts/prepare-ec2-env-for-kylin4.sh b/backup/scripts/prepare-ec2-env-for-kylin4.sh
index cdad91d..bfdfdfa 100644
--- a/backup/scripts/prepare-ec2-env-for-kylin4.sh
+++ b/backup/scripts/prepare-ec2-env-for-kylin4.sh
@@ -114,6 +114,8 @@ while [[ $# != 0 ]]; do
     MDX_VERSION=$2
   elif [[ $1 == "--support-glue" ]]; then
     SUPPORT_GLUE=$2
+  elif [[ $1 == "--enable-mdx" ]]; then
+    ENABLE_MDX=$2
   else
     help
   fi
@@ -153,6 +155,10 @@ if [[ -z "$MDX_DATABASE" ]]; then
   MDX_DATABASE=kylin_mdx
 fi
 
+if [[ -z "$ENABLE_MDX" ]]; then
+  ENABLE_MDX=false
+fi
+
 LOCAL_CACHE_DIR=/home/ec2-user/ssd
 
 ### File name
@@ -426,7 +432,7 @@ function init_hive() {
   fi
 
   if [[ $SUPPORT_GLUE == "true" ]]; then
-      cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
+    cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <configuration>
@@ -661,7 +667,7 @@ function init_kylin() {
   aws s3 cp ${PATH_TO_BUCKET}/properties/${CLUSTER_NUM}/kylin.properties ${KYLIN_HOME}/conf/kylin.properties --region ${CURRENT_REGION}
 
   if [[ ${LOCAL_CACHE_SOFT_AFFINITY} == "true" ]]; then
-    cat <<EOF >> ${KYLIN_HOME}/conf/kylin.properties
+    cat <<EOF >>${KYLIN_HOME}/conf/kylin.properties
 kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=\${kylin.env.hdfs-working-dir} -Dkylin.metadata.identifier=\${kylin.metadata.url.identifier} -Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}} -Dalluxio.user.client.cache.dir=${LOCAL_CACHE_DIR}/alluxio-cache-{{APP_ID}}-{{EXECUTOR_ID}}
 
 kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current -Dalluxio.user.client.cache.dir=${LOCAL_CACHE_DIR}/alluxio-cache-driver
@@ -702,6 +708,9 @@ EOF
 }
 
 function prepare_mdx() {
+  if [[ ${ENABLE_MDX} == "false" ]]; then
+    return
+  fi
   logging info "Preparing MDX ..."
 
   if [[ -f ${HOME_DIR}/.prepared_mdx ]]; then
@@ -736,6 +745,9 @@ function prepare_mdx() {
 }
 
 function init_mdx() {
+  if [[ ${ENABLE_MDX} == "false" ]]; then
+    return
+  fi
   if [[ -f ${HOME_DIR}/.inited_mdx ]]; then
     logging warn "MDX already inited ..."
     return
@@ -745,7 +757,7 @@ function init_mdx() {
     aws s3 cp ${PATH_TO_BUCKET}/jars/mysql-connector-java-8.0.24.jar $MDX_HOME/semantic-mdx/lib/ --region ${CURRENT_REGION}
   fi
 
-  if [[ ! -f  $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then
+  if [[ ! -f $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then
     logging info "Copy jdbc driver from $KYLIN_HOME to $MDX_HOME/semantic-mdx/lib/ ..."
     cp -f $KYLIN_HOME/lib/kylin-jdbc-*.jar $MDX_HOME/semantic-mdx/lib/
   fi
@@ -812,7 +824,7 @@ function start_kylin() {
 
 function sample_for_kylin() {
   if [[ $SUPPORT_GLUE == "true" ]]; then
-      return
+    return
   fi
 
   if [[ ${IS_SCALED} == "false" ]]; then
@@ -832,6 +844,9 @@ function restart_kylin() {
 }
 
 function start_mdx() {
+  if [[ ${ENABLE_MDX} == "false" ]]; then
+    return
+  fi
   ${MDX_HOME}/bin/mdx.sh start
 }
 
diff --git a/cloudformation_templates/ec2-cluster-kylin4-template.yaml b/cloudformation_templates/ec2-cluster-kylin4-template.yaml
index 57cced5..5b14a18 100644
--- a/cloudformation_templates/ec2-cluster-kylin4-template.yaml
+++ b/cloudformation_templates/ec2-cluster-kylin4-template.yaml
@@ -153,6 +153,9 @@ Parameters:
   SupportGlue:
     Type: String
     Default: false
+  EnableMDX:
+    Type: String
+    Default: false
 
 Mappings:
   AWSRegionArch2AMI:
@@ -260,7 +263,7 @@ Resources:
               #!/bin/bash -xe
               cd /home/ec2-user
               aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion}
-              bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
+              bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
               echo " Kylin4 is ready ..."
             - PrivateBucketFullPath: !Ref BucketFullPath
               PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName
@@ -279,6 +282,7 @@ Resources:
               PrivateHadoopVersion: !Ref HadoopVersion
               PrivateMdxVersion: !Ref MdxVersion
               PrivateSupportGlue: !Ref SupportGlue
+              PrivateEnableMDX: !Ref EnableMDX
 
 Outputs:
   IdOfInstance:
@@ -305,3 +309,6 @@ Outputs:
   SupportGlue:
     Description: is supported glue ?
     Value: !Ref SupportGlue
+  EnableMDX:
+    Description: mdx is enabled?
+    Value: !Ref EnableMDX
diff --git a/cloudformation_templates/ec2-cluster-kylin4.yaml b/cloudformation_templates/ec2-cluster-kylin4.yaml
index a4eb58b..e02fadf 100644
--- a/cloudformation_templates/ec2-cluster-kylin4.yaml
+++ b/cloudformation_templates/ec2-cluster-kylin4.yaml
@@ -154,6 +154,10 @@ Parameters:
     Type: String
     Default: false
 
+  EnableMDX:
+    Type: String
+    Default: false
+
 Mappings:
   AWSRegionArch2AMI:
     cn-north-1:
@@ -260,7 +264,7 @@ Resources:
               #!/bin/bash -xe
               cd /home/ec2-user
               aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion}
-              bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
+              bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
               echo " Kylin4 is ready ..."
             - PrivateBucketFullPath: !Ref BucketFullPath
               PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName
@@ -279,6 +283,7 @@ Resources:
               PrivateHadoopVersion: !Ref HadoopVersion
               PrivateMdxVersion: !Ref MdxVersion
               PrivateSupportGlue: !Ref SupportGlue
+              PrivateEnableMDX: !Ref EnableMDX
 
 Outputs:
   IdOfInstance:
@@ -305,3 +310,6 @@ Outputs:
   SupportGlue:
     Description: is supported glue ?
     Value: !Ref SupportGlue
+  EnableMDX:
+    Description: mdx is enabled?
+    Value: !Ref EnableMDX
diff --git a/kylin_configs.yaml b/kylin_configs.yaml
index 658bcec..d924459 100644
--- a/kylin_configs.yaml
+++ b/kylin_configs.yaml
@@ -56,6 +56,9 @@ CIDR_IP: ${Cidr Ip}
 #   If you set `All` mode for Kylin, there will be a error when you query any sql.
 SUPPORT_GLUE: &SUPPORT_GLUE 'false'
 
+# Enable using MDX
+ENABLE_MDX: &ENABLE_MDX 'false'
+
 # ============ AWS Configs End ============
 
 # ============ Related Version of Services ============
@@ -246,6 +249,7 @@ EC2_KYLIN4_PARAMS:
   HiveVersion: *HIVE_VERSION
   MdxVersion: *MDX_VERSION
   SupportGlue: *SUPPORT_GLUE
+  EnableMDX: *ENABLE_MDX
 
   AssociatedPublicIp: *associated_public_ip
 
@@ -301,6 +305,7 @@ EC2_KYLIN4_SCALE_PARAMS:
   HiveVersion: *HIVE_VERSION
   MdxVersion: *MDX_VERSION
   SupportGlue: *SUPPORT_GLUE
+  EnableMDX: *ENABLE_MDX
 
   AssociatedPublicIp: *associated_public_ip
 
diff --git a/readme/prerequisites.md b/readme/prerequisites.md
index 361077d..2a6497d 100644
--- a/readme/prerequisites.md
+++ b/readme/prerequisites.md
@@ -14,7 +14,7 @@ git clone https://github.com/apache/kylin.git && cd kylin && git checkout kylin4
 
 > Note: 
 >
-> ​	`IAM` role must have the access which contains `AmazonEC2RoleforSSM`, `AmazonSSMFullAccess,` and `AmazonSSMManagedInstanceCore`.
+> ​	`IAM` role must have the access which contains `AmazonEC2RoleforSSM`, `AmazonSSMFullAccess,` and `AmazonSSMManagedInstanceCore`, and `AWSGlueConsoleFullAccess` (this role is needed if you want to use glue).
 >
 > ​	This `IAM` Role will be used to initialize every ec2 instances which is for creating a kylin4 cluster on AWS. And it will configure in `Initialize Env of Local Machine` part.