You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by yw...@apache.org on 2019/10/18 04:08:26 UTC
[bigtop] branch cnb updated: BIGTOP-3255: Add Spark operator into
distribution
This is an automated email from the ASF dual-hosted git repository.
ywkim pushed a commit to branch cnb
in repository https://gitbox.apache.org/repos/asf/bigtop.git
The following commit(s) were added to refs/heads/cnb by this push:
new 472c525 BIGTOP-3255: Add Spark operator into distribution
472c525 is described below
commit 472c525c701a53c815d5cff1240eee21a9f5e897
Author: Youngwoo Kim <yw...@apache.org>
AuthorDate: Fri Oct 18 11:59:38 2019 +0900
BIGTOP-3255: Add Spark operator into distribution
---
README.md | 4 ++
bigtop-tests/smoke-tests/spark/TestSpark.groovy | 53 +++++++---------------
bigtop-tests/smoke-tests/spark/build.gradle | 2 +-
bigtop.bom | 13 +++++-
kubespray/vagrant/Vagrantfile | 2 +-
spark/README.md | 50 +++++++++++++++++++++
spark/examples/spark-pi-batch.yaml | 40 +++++++++++++++++
spark/examples/spark-pi.yaml | 39 +++++++++++++++++
spark/spark-rbac.yaml | 32 ++++++++++++++
spark/values.yaml | 58 +++++++++++++++++++++++++
10 files changed, 253 insertions(+), 40 deletions(-)
diff --git a/README.md b/README.md
index 53c288b..02fba6e 100755
--- a/README.md
+++ b/README.md
@@ -234,6 +234,10 @@ incubator/schema-registry
```
+## Spark
+
+See spark/README.md
+
# Getting Started
Below are some recipes for getting started with using Apache Bigtop. As Apache Bigtop has different subprojects, these recipes will continue to evolve.
diff --git a/bigtop-tests/smoke-tests/spark/TestSpark.groovy b/bigtop-tests/smoke-tests/spark/TestSpark.groovy
index 6d13ebf..29e0957 100644
--- a/bigtop-tests/smoke-tests/spark/TestSpark.groovy
+++ b/bigtop-tests/smoke-tests/spark/TestSpark.groovy
@@ -39,54 +39,33 @@ class TestSpark {
static private Log LOG = LogFactory.getLog(Object.class)
static Shell sh = new Shell("/bin/bash -s")
- static final String SPARK_HOME = System.getenv("SPARK_HOME")
- static final String SPARK_MASTER_IP = System.getenv("SPARK_MASTER_IP")
- static final String SPARK_MASTER_PORT = System.getenv("SPARK_MASTER_PORT")
- static final String TEST_SPARKSQL_LOG = "/tmp/TestSpark_testSparkSQL.log"
+ static final String BIGTOP_HOME = System.getenv("BIGTOP_HOME")
+ static final String BIGTOP_K8S_NS = "bigtop";
@BeforeClass
static void setUp() {
- sh.exec("rm -f " + TEST_SPARKSQL_LOG)
- // create HDFS examples/src/main/resources
- sh.exec("hdfs dfs -mkdir -p examples/src/main/resources")
- // extract people.txt file into it
- String examplesJar = JarContent.getJarName("$SPARK_HOME/examples/jars", 'spark-examples.*jar')
- assertNotNull(examplesJar, "spark-examples.jar file wasn't found")
- ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream("$SPARK_HOME/examples/jars/$examplesJar"))
- File examplesDir = new File('examples')
- examplesDir.mkdirs()
- zipInputStream.unzip(examplesDir.getName(), 'people')
- sh.exec("hdfs dfs -put examples/* examples/src/main/resources")
- logError(sh)
}
@AfterClass
public static void tearDown() {
- sh.exec("hdfs dfs -ls")
- logError(sh)
- sh.exec("hdfs dfs -rm -r examples")
+ sh.exec("kubectl delete sparkapplication " + "-n " + BIGTOP_K8S_NS + " spark-pi")
logError(sh)
}
@Test
- void testSparkSQL() {
- // Let's figure out the proper mode for the submission
- // If SPARK_MASTER_IP nor SPARK_MASTER_PORT are set, we'll assume
- // 'yarn-client' mode
- String masterMode = 'yarn-client'
- if (SPARK_MASTER_IP != null && SPARK_MASTER_PORT != null)
- masterMode = "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"
- else
- println("SPARK_MASTER isn't set. yarn-client submission will be used. " +
- "Refer to smoke-tests/README If this isn't what you you expect.")
-
- final String SPARK_SHELL = SPARK_HOME + "/bin/spark-shell --master $masterMode"
- // Let's use time, 'cause the test has one job
- sh.exec("timeout 120 " + SPARK_SHELL +
- " --class org.apache.spark.examples.sql.JavaSparkSQLExample " +
- " --jars " + SPARK_HOME + "/examples/jars/spark-examples*.jar > " +
- TEST_SPARKSQL_LOG + " 2>&1")
- logError(sh)
+ void testSparkPi() {
+ sh.exec("kubectl apply -f " + BIGTOP_HOME + "/spark/examples/spark-pi.yaml");
+ logError(sh);
assertTrue("Failed ...", sh.getRet() == 0);
+
+ // sleep 20s
+ sleep(20000);
+
+ sh.exec("kubectl logs " + "-n " + BIGTOP_K8S_NS + " spark-pi-driver");
+ logError(sh);
+ String out = sh.getOut().toString();
+ LOG.info("Output of Spark application driver:\n" + out);
+ assertTrue(out.contains("Pi is roughly"));
}
}
+
diff --git a/bigtop-tests/smoke-tests/spark/build.gradle b/bigtop-tests/smoke-tests/spark/build.gradle
index 1ed3e14..81fa007 100644
--- a/bigtop-tests/smoke-tests/spark/build.gradle
+++ b/bigtop-tests/smoke-tests/spark/build.gradle
@@ -29,5 +29,5 @@ sourceSets {
}
test.doFirst {
- checkEnv(["SPARK_HOME"])
+ checkEnv(["BIGTOP_HOME"])
}
diff --git a/bigtop.bom b/bigtop.bom
index af45d12..20f2b1c 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -90,7 +90,7 @@
bigtop {
/** Base Configuration of the mirror and archives */
- version = "1.5.0-SNAPSHOT"
+ version = "2.0.0-SNAPSHOT"
stack {
'jdk' { version = "1." + ( System.getenv('BIGTOP_JDK') ?: "8" ); version_base = version }
'scala' { version = '2.11.8'; version_base = version }
@@ -144,6 +144,17 @@ bigtop {
url { site = "https://github.com/rook/rook/archive"
archive = site }
}
+ 'volcano' {
+ name = "volcano"
+ pkg = "volcano"
+ relNotes = "Volcano is system for runnning high performance workloads on Kubernetes"
+ website = "https://volcano.sh"
+ version { base = '0.2'; pkg = base; release = 1 }
+ tarball { destination = "$name-${version.base}.tar.gz"
+ source = "v${version.base}.tar.gz" }
+ url { site = "https://github.com/volcano-sh/volcano/archive"
+ archive = site }
+ }
'zookeeper' {
name = 'zookeeper'
pkg = name
diff --git a/kubespray/vagrant/Vagrantfile b/kubespray/vagrant/Vagrantfile
index 99131d5..dff6145 100644
--- a/kubespray/vagrant/Vagrantfile
+++ b/kubespray/vagrant/Vagrantfile
@@ -178,7 +178,7 @@ Vagrant.configure("2") do |config|
# Disable swap for each vm
node.vm.provision "shell", inline: "swapoff -a"
- node.vm.provision "shell", inline: "yum install -y lvm2"
+ node.vm.provision "shell", inline: "yum install -y lvm2 java-1.8.0-openjdk-devel"
node.vm.provision "shell", inline: "sudo cp /bigtop/kubectl/plugin/kubectl-bigtop /usr/local/bin/ ; sudo chmod +x /usr/local/bin/kubectl-bigtop"
host_vars[vm_name] = {
diff --git a/spark/README.md b/spark/README.md
new file mode 100644
index 0000000..c209826
--- /dev/null
+++ b/spark/README.md
@@ -0,0 +1,50 @@
+# Spark on Kubernetes
+
+Volcano[1]:
+```
+$ cd $VOLCANO_HOME
+$ kubectl apply -f installer/volcano-development.yaml
+
+```
+*TODO*
+- Install Volcano via Helm
+
+Spark operator[2]:
+You can install spark operator on 'bigtop' namespace:
+```
+$ cd $BIGTOP_HOME
+
+$ kubectl create -f spark/spark-rbac.yaml
+$ helm repo add incubator http://storage.googleapis.com/kubernetes-charts-incubator
+$ helm install incubator/sparkoperator --namespace bigtop -f spark/values.yaml \
+--set enableBatchScheduler=true \
+--set enableWebhook=true
+
+$ kubectl get po -n bigtop
+
+```
+
+Running spark Pi examples:
+```
+$ kubectl apply -f spark/examples/spark-pi.yaml
+$ kubectl describe sparkapplication -n bigtop spark-pi
+......
+$ kubectl logs -n bigtop spark-pi-driver
+......
+
+Pi is roughly 3.1405357026785135
+
+```
+
+Running Spark Pi example with Volcano scheduler:
+```
+kubectl apply -f spark/examples/spark-pi-batch.yaml
+$ kubectl logs -n bigtop spark-pi-batch-driver
+
+```
+----
+
+1. https://volcano.sh
+
+2. https://github.com/GoogleCloudPlatform/spark-on-k8s-operator
+
diff --git a/spark/examples/spark-pi-batch.yaml b/spark/examples/spark-pi-batch.yaml
new file mode 100644
index 0000000..6d328ad
--- /dev/null
+++ b/spark/examples/spark-pi-batch.yaml
@@ -0,0 +1,40 @@
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+ name: spark-pi-batch
+ namespace: bigtop
+spec:
+ type: Scala
+ mode: cluster
+ image: "gcr.io/spark-operator/spark:v2.4.4"
+ imagePullPolicy: Always
+ mainClass: org.apache.spark.examples.SparkPi
+ mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar"
+ sparkVersion: "2.4.4"
+ batchScheduler: "volcano"
+ restartPolicy:
+ type: Never
+ volumes:
+ - name: "test-volume"
+ hostPath:
+ path: "/tmp"
+ type: Directory
+ driver:
+ cores: 1
+ coreLimit: "1200m"
+ memory: "512m"
+ labels:
+ version: 2.4.4
+ serviceAccount: spark
+ volumeMounts:
+ - name: "test-volume"
+ mountPath: "/tmp"
+ executor:
+ cores: 1
+ instances: 1
+ memory: "512m"
+ labels:
+ version: 2.4.4
+ volumeMounts:
+ - name: "test-volume"
+ mountPath: "/tmp"
diff --git a/spark/examples/spark-pi.yaml b/spark/examples/spark-pi.yaml
new file mode 100644
index 0000000..7738fac
--- /dev/null
+++ b/spark/examples/spark-pi.yaml
@@ -0,0 +1,39 @@
+apiVersion: "sparkoperator.k8s.io/v1beta2"
+kind: SparkApplication
+metadata:
+ name: spark-pi
+ namespace: bigtop
+spec:
+ type: Scala
+ mode: cluster
+ image: "gcr.io/spark-operator/spark:v2.4.4"
+ imagePullPolicy: Always
+ mainClass: org.apache.spark.examples.SparkPi
+ mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar"
+ sparkVersion: "2.4.4"
+ restartPolicy:
+ type: Never
+ volumes:
+ - name: "test-volume"
+ hostPath:
+ path: "/tmp"
+ type: Directory
+ driver:
+ cores: 1
+ coreLimit: "1200m"
+ memory: "512m"
+ labels:
+ version: 2.4.4
+ serviceAccount: spark
+ volumeMounts:
+ - name: "test-volume"
+ mountPath: "/tmp"
+ executor:
+ cores: 1
+ instances: 1
+ memory: "512m"
+ labels:
+ version: 2.4.4
+ volumeMounts:
+ - name: "test-volume"
+ mountPath: "/tmp"
diff --git a/spark/spark-rbac.yaml b/spark/spark-rbac.yaml
new file mode 100644
index 0000000..779d5cf
--- /dev/null
+++ b/spark/spark-rbac.yaml
@@ -0,0 +1,32 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: spark
+ namespace: bigtop
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ namespace: bigtop
+ name: spark-role
+rules:
+- apiGroups: [""]
+ resources: ["pods"]
+ verbs: ["*"]
+- apiGroups: [""]
+ resources: ["services"]
+ verbs: ["*"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ name: spark-role-binding
+ namespace: bigtop
+subjects:
+- kind: ServiceAccount
+ name: spark
+ namespace: bigtop
+roleRef:
+ kind: Role
+ name: spark-role
+ apiGroup: rbac.authorization.k8s.io
diff --git a/spark/values.yaml b/spark/values.yaml
new file mode 100644
index 0000000..77f2dcf
--- /dev/null
+++ b/spark/values.yaml
@@ -0,0 +1,58 @@
+operatorImageName: gcr.io/spark-operator/spark-operator
+operatorVersion: v1beta2-1.0.1-2.4.4
+imagePullPolicy: IfNotPresent
+replicas: 1
+
+rbac:
+ create: true
+
+serviceAccounts:
+ spark:
+ create: true
+ name: spark
+ sparkoperator:
+ create: true
+ name:
+
+sparkJobNamespace: ""
+installCrds: true
+controllerThreads: 10
+resyncInterval: 30
+ingressUrlFormat: ""
+logLevel: 2
+
+securityContext: {}
+
+enableWebhook: true
+webhookPort: 8080
+
+enableMetrics: true
+metricsPort: 10254
+metricsEndpoint: "/metrics"
+metricsPrefix: ""
+
+## Node labels for pod assignment
+## Ref: https://kubernetes.io/docs/user-guide/node-selection/
+##
+nodeSelector: {}
+
+podAnnotations: {}
+
+## Resources for the sparkoperator deployment
+## Ref: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
+##
+resources: {}
+
+## Whether to enable batch scheduler for pod scheduling,
+## if enabled, end user can specify batch scheduler name in spark application.
+enableBatchScheduler: false
+
+## Whether to enable the ResourceQuota enforcement for SparkApplication resources.
+## Requires the webhook to be enabled by setting enableWebhook to true.
+## Ref: https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md#enabling-resource-quota-enforcement.
+enableResourceQuotaEnforcement: false
+
+## Whether to enable leader election when the operator Deployment has more than one replica.
+## Only applicable when `replicas` is set to a value greater than 1.
+## Ref: https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md#enabling-leader-election-for-high-availability.
+enableLeaderElection: false