You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by su...@apache.org on 2019/11/07 18:24:09 UTC
[incubator-gobblin] branch master updated: [GOBBLIN-913] Add MySQL
and configurations to cluster
This is an automated email from the ASF dual-hosted git repository.
suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new e785b02 [GOBBLIN-913] Add MySQL and configurations to cluster
e785b02 is described below
commit e785b02d9fd59dbeb4282387d1956778df4f6fca
Author: William Lo <wl...@linkedin.com>
AuthorDate: Thu Nov 7 10:24:02 2019 -0800
[GOBBLIN-913] Add MySQL and configurations to cluster
Closes #2781 from Will-Lo/mysql-k8s-init
---
.../alpine-gaas-latest/entrypoint.sh | 3 +-
.../gobblin-service/base-cluster/README.md | 4 ++
.../application.yaml | 2 +-
.../{basic-cluster => base-cluster}/ingress.yaml | 0
.../{basic-cluster => base-cluster}/storage.yaml | 0
.../gobblin-service/mysql-cluster/README.md | 4 ++
.../application.yaml | 50 +++++++++++----
.../mysql-cluster/gaas-application.conf | 73 +++++++++++++++++++++
.../mysql-cluster/kustomization.yaml | 17 +++++
.../mysql-cluster/mysql-deployment.yaml | 55 ++++++++++++++++
.../gobblin-service/mysql-cluster/mysql-pv.yaml | 26 ++++++++
.../mysql-cluster/standalone-application.conf | 75 ++++++++++++++++++++++
12 files changed, 294 insertions(+), 15 deletions(-)
diff --git a/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh b/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
index 86960cc..8bbd2c8 100755
--- a/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
+++ b/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
@@ -17,5 +17,4 @@
#
GOBBLIN_HOME="$(cd `dirname $0`/..; pwd)"
-./bin/gobblin.sh service gobblin-as-service start --log-to-stdout $@
-
+./bin/gobblin.sh service gobblin-as-service start --log-to-stdout "$@"
diff --git a/gobblin-kubernetes/gobblin-service/base-cluster/README.md b/gobblin-kubernetes/gobblin-service/base-cluster/README.md
new file mode 100644
index 0000000..5f0b544
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/base-cluster/README.md
@@ -0,0 +1,4 @@
+# Kubernetes Cluster For GaaS
+
+This folder includes the files needed to create and run a GaaS instance, using the FS as the communication mechansim between GaaS and Gobblin Standalone
+
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
similarity index 97%
copy from gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
copy to gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
index 65ed8c1..c50a4b7 100644
--- a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
+++ b/gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
@@ -30,7 +30,7 @@ spec:
image: will97/gobblin-as-a-service:latest
volumeMounts:
- name: shared-jobs
- mountPath: /tmp/gobblin-service/jobs
+ mountPath: /tmp/gobblin-as-service/jobs
- name: shared-template-catalogs
mountPath: /tmp/templateCatalog
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/ingress.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/ingress.yaml
similarity index 100%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/ingress.yaml
rename to gobblin-kubernetes/gobblin-service/base-cluster/ingress.yaml
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/storage.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/storage.yaml
similarity index 100%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/storage.yaml
rename to gobblin-kubernetes/gobblin-service/base-cluster/storage.yaml
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md b/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md
new file mode 100644
index 0000000..137970f
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md
@@ -0,0 +1,4 @@
+# MySQL K8s Cluster
+
+- Adds MySQL as the SpecStore
+- Adds configuration files to be loaded as configMaps, allowing configuration changes to GaaS without rebuilding a new image.
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
similarity index 50%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
rename to gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
index 65ed8c1..20a3226 100644
--- a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
@@ -19,26 +19,46 @@ spec:
app: gaas
spec:
volumes:
- - name: 'shared-jobs'
+ - name: shared-jobs
persistentVolumeClaim:
claimName: shared-jobs-claim
- - name: 'shared-template-catalogs'
- persistentVolumeClaim:
- claimName: shared-template-catalogs-claim
+ - name: gaas-config
+ configMap:
+ name: gaas-config
containers:
- name: gobblin-service
image: will97/gobblin-as-a-service:latest
+ command: ["./bin/entrypoint.sh"]
+ args: ["--jvmopts", "-DmysqlCredentials.user=$(MYSQL_USERNAME) -DmysqlCredentials.password=$(MYSQL_PASSWORD)"]
+ env:
+ - name: MYSQL_USERNAME
+ valueFrom:
+ secretKeyRef:
+ name: mysql-credentials
+ key: username
+ - name: MYSQL_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: mysql-credentials
+ key: password
volumeMounts:
- name: shared-jobs
- mountPath: /tmp/gobblin-service/jobs
- - name: shared-template-catalogs
- mountPath: /tmp/templateCatalog
+ mountPath: /tmp/gobblin-as-service/jobs
+ - name: gaas-config
+ mountPath: /home/gobblin/conf/gobblin-as-service/application.conf
+ subPath: gaas-application.conf
+ # dependency on mysql to be initialized before gaas can be initialized
+ initContainers:
+ - name: init-mysql
+ image: busybox:1.28
+ command: ["sh", "-c", "until nslookup mysql; do echo waiting for mysql; sleep 2; done;"]
+
---
apiVersion: apps/v1
kind: Deployment
metadata:
- name: 'gobblin-standalone-deployment'
+ name: gobblin-standalone-deployment
labels:
app: gobblin-standalone-deployment
spec:
@@ -53,15 +73,21 @@ spec:
app: gobblin-standalone
spec:
volumes:
- - name: 'shared-jobs'
+ - name: shared-jobs
persistentVolumeClaim:
claimName: shared-jobs-claim
+ - name: standalone-config
+ configMap:
+ name: standalone-config
containers:
- name: gobblin-standalone
image: will97/gobblin-standalone:latest
volumeMounts:
- name: shared-jobs
mountPath: /tmp/gobblin-standalone/jobs
+ - name: standalone-config
+ mountPath: /home/gobblin/conf/standalone/application.conf
+ subPath: standalone-application.conf
---
apiVersion: v1
kind: Service
@@ -70,10 +96,10 @@ metadata:
labels:
app: gobblin-service
spec:
- type: ClusterIP
+ type: NodePort
ports:
- - protocol: TCP
- port: 6956
+ - port: 6956
+ protocol: TCP
targetPort: 6956
selector:
app: gaas
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf b/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf
new file mode 100644
index 0000000..b2f9366
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Sample configuration properties for the Gobblin Service
+
+# Topology Catalog and Store
+gobblin.service.work.dir=/tmp/gobblin-as-service
+
+# TopologySpec Factory
+topologySpec.store.dir=${gobblin.service.work.dir}/topologySpecStore
+topologySpecFactory.topologyNames=localGobblinCluster
+topologySpecFactory.localGobblinCluster.description="StandaloneClusterTopology"
+topologySpecFactory.localGobblinCluster.version="1"
+topologySpecFactory.localGobblinCluster.uri="gobblinCluster"
+topologySpecFactory.localGobblinCluster.specExecutorInstance.class="org.apache.gobblin.runtime.spec_executorInstance.LocalFsSpecExecutor"
+topologySpecFactory.localGobblinCluster.specExecInstance.capabilities="source:dest"
+topologySpecFactory.localGobblinCluster.gobblin.cluster.localSpecProducer.dir=${gobblin.service.work.dir}/jobs
+
+# Flow Catalog and Store
+flowSpec.store.dir=${gobblin.service.work.dir}/flowSpecStore
+
+# Template Catalog
+gobblin.service.templateCatalogs.fullyQualifiedPath="file://"
+
+# JobStatusMonitor
+gobblin.service.jobStatusMonitor.enabled=false
+
+# FsJobStatusRetriever
+fsJobStatusRetriever.state.store.dir=${gobblin.service.work.dir}/state-store
+
+# DagManager
+gobblin.service.dagManager.enabled=true
+gobblin.service.dagManager.jobStatusRetriever.class="org.apache.gobblin.service.monitoring.FsJobStatusRetriever"
+gobblin.service.dagManager.dagStateStoreClass="org.apache.gobblin.service.modules.orchestration.FSDagStateStore"
+gobblin.service.dagManager.dagStateStoreDir=${gobblin.service.work.dir}/dagStateStoreDir
+
+# RestLI
+gobblin.service.port=6956
+
+# MySQL State Store
+flowSpec.store.class="org.apache.gobblin.runtime.spec_store.MysqlSpecStore"
+flowSpec.serde.class="org.apache.gobblin.runtime.spec_serde.GsonFlowSpecSerDe"
+state.store.factory.class="org.apache.gobblin.metastore.MysqlJobStatusStateStoreFactory"
+
+mysqlSpecStore.state.store.db.table="flow_spec_store"
+
+# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.svc.cluster.local, see https://github.com/kubernetes/dns/blob/master/docs/specification.md
+mysqlSpecStore.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
+mysqlSpecStore.state.store.db.user=${mysqlCredentials.user}
+mysqlSpecStore.state.store.db.password=${mysqlCredentials.password}
+
+# MySQL Job Status Retriever
+jobStatusRetriever.class="org.apache.gobblin.service.monitoring.MysqlJobStatusRetriever"
+mysqlJobStatusRetriever.state.store.db.table="gaas_job_status"
+
+# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.cluster.local
+mysqlJobStatusRetriever.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
+mysqlJobStatusRetriever.state.store.db.user=${mysqlCredentials.user}
+mysqlJobStatusRetriever.state.store.db.password=${mysqlCredentials.password}
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml
new file mode 100644
index 0000000..9899123
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml
@@ -0,0 +1,17 @@
+resources:
+ - application.yaml
+ - mysql-deployment.yaml
+ - mysql-pv.yaml
+configMapGenerator:
+ - name: gaas-config
+ files:
+ - ./gaas-application.conf
+ - name: standalone-config
+ files:
+ - ./standalone-application.conf
+secretGenerator:
+ # this should be replaced with references to files/vars stored securely
+ - name: mysql-credentials
+ literals:
+ - username=default-user
+ - password=default-password
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml
new file mode 100644
index 0000000..a949979
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml
@@ -0,0 +1,55 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: mysql
+spec:
+ ports:
+ - protocol: TCP
+ port: 3306
+ targetPort: 3306
+ selector:
+ app: mysql
+---
+apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
+kind: Deployment
+metadata:
+ name: mysql
+spec:
+ selector:
+ matchLabels:
+ app: mysql
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ labels:
+ app: mysql
+ spec:
+ volumes:
+ - name: mysql-persistent-storage
+ persistentVolumeClaim:
+ claimName: mysql-pv-claim
+ containers:
+ - image: mysql:5.6
+ name: mysql
+ env:
+ - name: MYSQL_RANDOM_ROOT_PASSWORD
+ value: "yes"
+ - name: MYSQL_DATABASE
+ value: "gaas_db"
+ - name: MYSQL_USER
+ valueFrom:
+ secretKeyRef:
+ name: mysql-credentials
+ key: username
+ - name: MYSQL_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: mysql-credentials
+ key: password
+ ports:
+ - containerPort: 3306
+ name: mysql
+ volumeMounts:
+ - name: mysql-persistent-storage
+ mountPath: /var/lib/mysql
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml
new file mode 100644
index 0000000..77d58d9
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: mysql-pv-volume
+ labels:
+ type: local
+spec:
+ storageClassName: manual
+ capacity:
+ storage: 1Gi
+ accessModes:
+ - ReadWriteOnce
+ hostPath:
+ path: "/mnt/data"
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: mysql-pv-claim
+spec:
+ storageClassName: manual
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 1Gi
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf b/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf
new file mode 100644
index 0000000..77e1182
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Thread pool settings for the task executor
+taskexecutor.threadpool.size=2
+taskretry.threadpool.coresize=1
+taskretry.threadpool.maxsize=2
+
+# File system URIs
+fs.uri=file:///
+writer.fs.uri=${fs.uri}
+state.store.fs.uri=${fs.uri}
+
+# Writer related configuration properties
+writer.output.format=AVRO
+writer.staging.dir=${env:GOBBLIN_WORK_DIR}/task-staging
+writer.output.dir=${env:GOBBLIN_WORK_DIR}/task-output
+
+# Data publisher related configuration properties
+data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher
+data.publisher.final.dir=${env:GOBBLIN_WORK_DIR}/job-output
+data.publisher.replace.final.dir=false
+
+# Directory where job configuration files are stored
+jobconf.dir=${env:GOBBLIN_JOB_CONFIG_DIR}
+jobconf.fullyQualifiedPath=file://${env:GOBBLIN_JOB_CONFIG_DIR}
+
+# Directory where job/task state files are stored
+state.store.dir=${env:GOBBLIN_WORK_DIR}/state-store
+
+# Directory where commit sequences are stored
+gobblin.runtime.commit.sequence.store.dir=${env:GOBBLIN_WORK_DIR}/commit-sequence-store
+
+# Directory where error files from the quality checkers are stored
+qualitychecker.row.err.file=${env:GOBBLIN_WORK_DIR}/err
+
+# Directory where job locks are stored
+job.lock.dir=${env:GOBBLIN_WORK_DIR}/locks
+
+# Directory where metrics log files are stored
+metrics.log.dir=${env:GOBBLIN_WORK_DIR}/metrics
+
+# Enable metrics / events
+metrics.enabled=true
+
+# UI
+#admin.server.enabled=false
+admin.server.enabled=true
+admin.server.port=9000
+
+rest.server.host=localhost
+rest.server.port=9090
+
+# job history store ( WARN [GobblinYarnAppLauncher] NOT starting the admin UI because the job execution info server is NOT enabled )
+job.execinfo.server.enabled=false
+job.history.store.enabled=false
+task.status.reportintervalinms=5000
+
+# The time gap for Job Detector to detect modification/deletion/creation of jobconfig.
+# Unit in milliseconds, configurable.
+jobconf.monitor.interval=30000