You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/05/05 17:35:32 UTC

[kylin] 01/04: KYLIN-4181 Schedule Kylin using Kubernetes

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 0fdf7764a2dc3c5f6d025580a16d397b32fffc7c
Author: Temple Zhou <db...@gmail.com>
AuthorDate: Sun Apr 26 13:04:32 2020 +0800

    KYLIN-4181 Schedule Kylin using Kubernetes
---
 .gitignore                              |   4 +-
 kubernetes/Dockerfile                   |  78 +++++++++++++++++++++++
 kubernetes/README.md                    | 109 ++++++++++++++++++++++++++++++++
 kubernetes/kylin-configmap.sh           |  17 +++++
 kubernetes/kylin-job-statefulset.yaml   |  95 ++++++++++++++++++++++++++++
 kubernetes/kylin-query-statefulset.yaml |  95 ++++++++++++++++++++++++++++
 kubernetes/kylin-secret.sh              |   3 +
 kubernetes/kylin-service.yaml           |  44 +++++++++++++
 8 files changed, 444 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 98da29c..69d61d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -93,4 +93,6 @@ dependency-reduced-pom.xml
 webapp/package-lock.json
 
 # stream_index
-stream-receiver/stream_index
\ No newline at end of file
+stream-receiver/stream_index
+# configuration files
+kubernetes/conf/*
diff --git a/kubernetes/Dockerfile b/kubernetes/Dockerfile
new file mode 100644
index 0000000..6454417
--- /dev/null
+++ b/kubernetes/Dockerfile
@@ -0,0 +1,78 @@
+FROM centos:6.9
+
+ARG APACHE_MIRRORS=http://mirrors.aliyun.com
+ENV APACHE_MIRRORS  ${APACHE_MIRRORS}
+
+ENV JAVA_VERSION    1.8.0
+ENV SPARK_VERSION   2.3.4
+ENV KAFKA_VERSION   2.1.1
+ENV KYLIN_VERSION   3.0.0
+
+ENV JAVA_HOME       /usr/lib/jvm/java-${JAVA_VERSION}
+ENV HADOOP_HOME     /usr/lib/hadoop
+ENV HIVE_HOME       /usr/lib/hive
+ENV HCAT_HOME       /usr/lib/hive-hcatalog
+ENV HBASE_HOME      /usr/lib/hbase
+ENV SPARK_HOME      /opt/spark-${SPARK_VERSION}-bin-hadoop2.6
+ENV KAFKA_HOME      /opt/kafka_2.11-${KAFKA_VERSION}
+ENV KYLIN_HOME      /opt/apache-kylin-${KYLIN_VERSION}-bin-cdh57
+
+ENV PATH $PATH:\
+$SPARK_HOME/bin:\
+$KAFKA_HOME/bin:\
+$KYLIN_HOME/bin
+
+ENV HADOOP_CONF_DIR  /etc/hadoop/conf
+ENV HIVE_CONF_DIR    /etc/hive/conf
+ENV HBASE_CONF_DIR   /etc/hbase/conf
+ENV HIVE_CONF        ${HIVE_CONF_DIR}
+ENV HIVE_LIB         ${HIVE_HOME}/lib
+
+RUN echo $'[cloudera-cdh5] \n\
+# Packages for Cloudera\'s Distribution for Hadoop, Version 5, on RedHat or CentOS 6 x86_64 \n\
+name=Cloudera\'s Distribution for Hadoop, Version 5 \n\
+baseurl=https://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/5.7.6/ \n\
+gpgkey =https://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera \n\
+gpgcheck = 1' > /etc/yum.repos.d/cloudera-cdh5.repo
+
+WORKDIR /opt
+
+# Download Kafka from APACHE_MIRRORS
+RUN set -xeu && \
+    curl -o kafka_2.11-${KAFKA_VERSION}.tgz \
+    ${APACHE_MIRRORS}/apache/kafka/${KAFKA_VERSION}/kafka_2.11-${KAFKA_VERSION}.tgz && \
+    tar -zxf kafka_2.11-${KAFKA_VERSION}.tgz && rm kafka_2.11-${KAFKA_VERSION}.tgz
+
+# Download Spark from APACHE_MIRRORS
+RUN set -xeu && \
+    curl -o spark-${SPARK_VERSION}-bin-hadoop2.6.tgz \
+    ${APACHE_MIRRORS}/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz && \
+    tar -zxf spark-${SPARK_VERSION}-bin-hadoop2.6.tgz && rm spark-${SPARK_VERSION}-bin-hadoop2.6.tgz
+
+# Download Kylin from APACHE_MIRRORS
+RUN set -xeu && \
+    curl -o apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz \
+    ${APACHE_MIRRORS}/apache/kylin/apache-kylin-${KYLIN_VERSION}/apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz && \
+    tar -zxf apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz && rm apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz
+
+# Setup Hadoop & Hive & HBase using CDH Repository. PS: The libhadoop.so provided by CDH is complied with snappy
+RUN set -xeu && \
+    yum -y -q install java-1.8.0-openjdk-devel && \
+    yum -y -q install krb5-workstation && \
+    yum -y -q install hadoop-client && \
+    yum -y -q install hive hive-hcatalog && \
+    yum -y -q install hbase && \
+    curl -o ${HIVE_HOME}/lib/hadoop-lzo-0.4.15.jar \
+    https://clojars.org/repo/hadoop-lzo/hadoop-lzo/0.4.15/hadoop-lzo-0.4.15.jar && \
+    curl -o ${HIVE_HOME}/lib/mysql-connector-java-5.1.24.jar \
+    https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.24/mysql-connector-java-5.1.24.jar && \
+    yum -q clean all && \
+    rm -rf /var/cache/yum && \
+    rm -rf /tmp/* /var/tmp/* && \
+    groupadd kylin --gid 1000 && \
+    useradd kylin --uid 1000 --gid 1000 && \
+    chown -R "kylin:kylin" ${KYLIN_HOME}
+
+EXPOSE 7070
+USER kylin:kylin
+CMD ${KYLIN_HOME}/bin/kylin.sh run
\ No newline at end of file
diff --git a/kubernetes/README.md b/kubernetes/README.md
new file mode 100644
index 0000000..205e3f3
--- /dev/null
+++ b/kubernetes/README.md
@@ -0,0 +1,109 @@
+# Kubernetes QuickStart
+
+This guide shows how to run Kylin cluster using Kubernetes StatefulSet Controller. The following figure depicts a typical scenario for Kylin cluster mode deployment:
+
+![image_name](http://kylin.apache.org/images/install/kylin_server_modes.png)
+
+## Build or Pull Docker Image
+
+You can pull the image from Docker Hub directly if you do not want to build the image locally:
+
+```bash
+docker pull apachekylin/apache-kylin:3.0.0-cdh57
+```
+
+TIPS: If you are woking with air-gapped network or slow internet speeds, we suggest you prepare the binary packages by yourself and execute this:
+
+```bash
+docker build -t "apache-kylin:${KYLIN_VERSION}-cdh57" --build-arg APACHE_MIRRORS=http://127.0.0.1:8000 .
+```
+
+## Prepare your Hadoop Configuration
+
+Put all of the configuration files under the "conf" directory.
+
+```bash
+kylin.properties
+applicationContext.xml  # If you need to set cacheManager to Memcached
+hbase-site.xml
+hive-site.xml
+hdfs-site.xml
+core-site.xml
+mapred-site.xml
+yarn-site.xml
+```
+
+If you worked with Kerberized Hadoop Cluster, do not forget to prepare the following files:
+
+```bash
+krb5.conf
+kylin.keytab
+```
+
+## Create ConfigMaps and Secret
+
+We recommand you to create separate Kubernetes namespace for Kylin.
+
+```bash
+kubectl create namespace kylin
+```
+
+Execute the following shell scripts to create the required ConfigMaps:
+
+```bash
+./kylin-configmap.sh
+./kylin-secret.sh
+```
+
+## Create Service and StatefulSet
+
+Make sure the following resources exist in your namespace:
+
+```bash
+kubectl get configmaps,secret -n kylin
+
+NAME                      DATA   AGE
+configmap/hadoop-config   4      89d
+configmap/hbase-config    1      89d
+configmap/hive-config     1      89d
+configmap/krb5-config     1      89d
+configmap/kylin-config    1      89d
+configmap/kylin-context   1      45d
+
+NAME                         TYPE                                  DATA   AGE
+secret/kylin-keytab          Opaque                                1      89d
+
+```
+
+Then, you need to create headless service for stable DNS entries(kylin-0.kylin, kylin-1.kylin, kylin-2.kylin...) of StatefulSet members.
+
+```bash
+kubectl apply -f kylin-service.yaml
+```
+
+Finally, create the StatefulSet and try to use it:
+
+```bash
+kubectl apply -f kylin-job-statefulset.yaml
+kubectl apply -f kylin-query-statefulset.yaml
+```
+
+If everything goes smoothly, you should see all 3 Pods become Running:
+
+```bash
+kubectl get statefulset,pod,service -n kylin
+
+NAME                           READY   AGE
+statefulset.apps/kylin-job     1/1     36d
+statefulset.apps/kylin-query   3/3     36d
+
+NAME                READY   STATUS    RESTARTS   AGE
+pod/kylin-job-0     1/1     Running   0          13m
+pod/kylin-query-0   1/1     Running   0          40h
+pod/kylin-query-1   1/1     Running   0          40h
+
+NAME                  TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)    AGE
+service/kylin         ClusterIP   None             <none>        7070/TCP   58d
+service/kylin-job     ClusterIP   xx.xxx.xx.xx     <none>        7070/TCP   89d
+service/kylin-query   ClusterIP   xx.xxx.xxx.xxx   <none>        7070/TCP   89d
+```
diff --git a/kubernetes/kylin-configmap.sh b/kubernetes/kylin-configmap.sh
new file mode 100755
index 0000000..b8ec1b9
--- /dev/null
+++ b/kubernetes/kylin-configmap.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+kubectl create configmap -n kylin hadoop-config --from-file=conf/core-site.xml \
+                                                --from-file=conf/hdfs-site.xml \
+                                                --from-file=conf/yarn-site.xml \
+                                                --from-file=conf/mapred-site.xml \
+                                                --dry-run -o yaml | kubectl apply -f -
+kubectl create configmap -n kylin hive-config   --from-file=conf/hive-site.xml \
+                                                --dry-run -o yaml | kubectl apply -f -
+kubectl create configmap -n kylin hbase-config  --from-file=conf/hbase-site.xml \
+                                                --dry-run -o yaml | kubectl apply -f -
+kubectl create configmap -n kylin kylin-config  --from-file=conf/kylin.properties \
+                                                --dry-run -o yaml | kubectl apply -f -
+kubectl create configmap -n kylin krb5-config   --from-file=conf/krb5.conf \
+                                                --dry-run -o yaml | kubectl apply -f -
+kubectl create configmap -n kylin kylin-context --from-file=conf/applicationContext.xml \
+                                                --dry-run -o yaml | kubectl apply -f -
diff --git a/kubernetes/kylin-job-statefulset.yaml b/kubernetes/kylin-job-statefulset.yaml
new file mode 100644
index 0000000..2a0f9fe
--- /dev/null
+++ b/kubernetes/kylin-job-statefulset.yaml
@@ -0,0 +1,95 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  annotations: {}
+  name: kylin-job
+  namespace: kylin
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: kylin
+      type: job
+  serviceName: kylin
+  template:
+    metadata:
+      labels:
+        app: kylin
+        type: job
+    spec:
+      containers:
+        - image: 'apachekylin/apache-kylin:3.0.0-cdh57'
+          imagePullPolicy: Always
+          lifecycle:
+            postStart:
+              exec:
+                command:
+                  - bash
+                  - '-c'
+                  - |
+                    set -ex
+                    # initialize the keytab
+                    kinit -kt /home/kylin/kylin.keytab kylin
+                    # set the kylin.server.mode
+                    sed "s/kylin\.server\.mode.*/kylin\.server\.mode=all/g" /mnt/kylin-config/kylin.properties > ${KYLIN_HOME}/conf/kylin.properties
+                    sed -i "s/kylin\.server\.host-address.*/kylin\.server\.host-address=`hostname`\.kylin:7070/g" ${KYLIN_HOME}/conf/kylin.properties
+                    sed -i "s/export KYLIN_JVM_SETTINGS.*/export KYLIN_JVM_SETTINGS=\"-Xms40g -Xmx40g -XX:NewSize=10g -XX:MaxNewSize=10g -XX:SurvivorRatio=3 -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:CMSInitiatingOccupancyFraction=70 -XX:+DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError\"/g" ${KYLIN_HOME}/conf/setenv.sh
+                    # unarchive the war file and replace the applicationContext if needed
+                    mkdir ${KYLIN_HOME}/tomcat/webapps/kylin
+                    cd ${KYLIN_HOME}/tomcat/webapps/kylin
+                    jar -xvf ${KYLIN_HOME}/tomcat/webapps/kylin.war
+                    cp /mnt/kylin-context/applicationContext.xml ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/classes
+          name: kylin
+          ports:
+            - containerPort: 7070
+          readinessProbe:
+            httpGet:
+              path: /kylin
+              port: 7070
+          resources:
+            limits:
+              cpu: 16
+              memory: 50G
+            requests:
+              cpu: 8
+              memory: 50G
+          volumeMounts:
+            - mountPath: /etc/hadoop/conf
+              name: hadoop-config
+            - mountPath: /etc/hive/conf
+              name: hive-config
+            - mountPath: /etc/hbase/conf
+              name: hbase-config
+            - mountPath: /home/kylin
+              name: kylin-keytab
+            - mountPath: /etc/krb5.conf
+              name: krb5-config
+              subPath: krb5.conf
+            - mountPath: /mnt/kylin-context
+              name: kylin-context
+            - mountPath: /mnt/kylin-config
+              name: kylin-config
+      volumes:
+        - configMap:
+            name: hadoop-config
+          name: hadoop-config
+        - configMap:
+            name: hive-config
+          name: hive-config
+        - configMap:
+            name: hbase-config
+          name: hbase-config
+        - configMap:
+            name: kylin-config
+          name: kylin-config
+        - configMap:
+            name: krb5-config
+          name: krb5-config
+        - configMap:
+            name: kylin-context
+          name: kylin-context
+        - name: kylin-keytab
+          secret:
+            secretName: kylin-keytab
+  updateStrategy:
+    type: RollingUpdate
diff --git a/kubernetes/kylin-query-statefulset.yaml b/kubernetes/kylin-query-statefulset.yaml
new file mode 100644
index 0000000..f504a58
--- /dev/null
+++ b/kubernetes/kylin-query-statefulset.yaml
@@ -0,0 +1,95 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  annotations: {}
+  name: kylin-query
+  namespace: kylin
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: kylin
+      type: query
+  serviceName: kylin
+  template:
+    metadata:
+      labels:
+        app: kylin
+        type: query
+    spec:
+      containers:
+        - image: 'apachekylin/apache-kylin:3.0.0-cdh57'
+          imagePullPolicy: Always
+          lifecycle:
+            postStart:
+              exec:
+                command:
+                  - bash
+                  - '-c'
+                  - |
+                    set -ex
+                    # initialize the keytab
+                    kinit -kt /home/kylin/kylin.keytab kylin
+                    # set the kylin.server.mode
+                    sed "s/kylin\.server\.mode.*/kylin\.server\.mode=query/g" /mnt/kylin-config/kylin.properties > ${KYLIN_HOME}/conf/kylin.properties
+                    sed -i "s/kylin\.server\.host-address.*/kylin\.server\.host-address=`hostname`\.kylin:7070/g" ${KYLIN_HOME}/conf/kylin.properties
+                    sed -i "s/export KYLIN_JVM_SETTINGS.*/export KYLIN_JVM_SETTINGS=\"-Xms16g -Xmx16g -XX:NewSize=3g -XX:MaxNewSize=3g -XX:SurvivorRatio=4 -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:CMSInitiatingOccupancyFraction=70 -XX:+DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError\"/g" ${KYLIN_HOME}/conf/setenv.sh
+                    # unarchive the war file and replace the applicationContext if needed
+                    mkdir ${KYLIN_HOME}/tomcat/webapps/kylin
+                    cd ${KYLIN_HOME}/tomcat/webapps/kylin
+                    jar -xvf ${KYLIN_HOME}/tomcat/webapps/kylin.war
+                    cp /mnt/kylin-context/applicationContext.xml ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/classes
+          name: kylin
+          ports:
+            - containerPort: 7070
+          readinessProbe:
+            httpGet:
+              path: /kylin
+              port: 7070
+          resources:
+            limits:
+              cpu: 8
+              memory: 20G
+            requests:
+              cpu: 8
+              memory: 20G
+          volumeMounts:
+            - mountPath: /etc/hadoop/conf
+              name: hadoop-config
+            - mountPath: /etc/hive/conf
+              name: hive-config
+            - mountPath: /etc/hbase/conf
+              name: hbase-config
+            - mountPath: /home/kylin
+              name: kylin-keytab
+            - mountPath: /etc/krb5.conf
+              name: krb5-config
+              subPath: krb5.conf
+            - mountPath: /mnt/kylin-context
+              name: kylin-context
+            - mountPath: /mnt/kylin-config
+              name: kylin-config
+      volumes:
+        - configMap:
+            name: hadoop-config
+          name: hadoop-config
+        - configMap:
+            name: hive-config
+          name: hive-config
+        - configMap:
+            name: hbase-config
+          name: hbase-config
+        - configMap:
+            name: kylin-config
+          name: kylin-config
+        - configMap:
+            name: krb5-config
+          name: krb5-config
+        - configMap:
+            name: kylin-context
+          name: kylin-context
+        - name: kylin-keytab
+          secret:
+            secretName: kylin-keytab
+  updateStrategy:
+    type: RollingUpdate
diff --git a/kubernetes/kylin-secret.sh b/kubernetes/kylin-secret.sh
new file mode 100755
index 0000000..87ab71e
--- /dev/null
+++ b/kubernetes/kylin-secret.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+kubectl create secret -n kylin generic kylin-keytab --from-file=conf/kylin.keytab
\ No newline at end of file
diff --git a/kubernetes/kylin-service.yaml b/kubernetes/kylin-service.yaml
new file mode 100644
index 0000000..50c3206
--- /dev/null
+++ b/kubernetes/kylin-service.yaml
@@ -0,0 +1,44 @@
+# Headless service for stable DNS entries of StatefulSet members.
+apiVersion: v1
+kind: Service
+metadata:
+  name: kylin
+  labels:
+    app: kylin
+spec:
+  ports:
+    - name: kylin
+      port: 7070
+  clusterIP: None
+  selector:
+    app: kylin
+---
+# For job instances.
+apiVersion: v1
+kind: Service
+metadata:
+  name: kylin-job
+spec:
+  type: ClusterIP
+  selector:
+    app: kylin
+    type: job
+  ports:
+    - protocol: TCP
+      port: 7070
+      targetPort: 7070
+---
+# For query instances.
+apiVersion: v1
+kind: Service
+metadata:
+  name: kylin-query
+spec:
+  type: ClusterIP
+  selector:
+    app: kylin
+    type: query
+  ports:
+    - protocol: TCP
+      port: 7070
+      targetPort: 7070
\ No newline at end of file