You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by su...@apache.org on 2019/11/07 18:24:09 UTC

[incubator-gobblin] branch master updated: [GOBBLIN-913] Add MySQL and configurations to cluster

This is an automated email from the ASF dual-hosted git repository.

suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new e785b02  [GOBBLIN-913] Add MySQL and configurations to cluster
e785b02 is described below

commit e785b02d9fd59dbeb4282387d1956778df4f6fca
Author: William Lo <wl...@linkedin.com>
AuthorDate: Thu Nov 7 10:24:02 2019 -0800

    [GOBBLIN-913] Add MySQL and configurations to cluster
    
    Closes #2781 from Will-Lo/mysql-k8s-init
---
 .../alpine-gaas-latest/entrypoint.sh               |  3 +-
 .../gobblin-service/base-cluster/README.md         |  4 ++
 .../application.yaml                               |  2 +-
 .../{basic-cluster => base-cluster}/ingress.yaml   |  0
 .../{basic-cluster => base-cluster}/storage.yaml   |  0
 .../gobblin-service/mysql-cluster/README.md        |  4 ++
 .../application.yaml                               | 50 +++++++++++----
 .../mysql-cluster/gaas-application.conf            | 73 +++++++++++++++++++++
 .../mysql-cluster/kustomization.yaml               | 17 +++++
 .../mysql-cluster/mysql-deployment.yaml            | 55 ++++++++++++++++
 .../gobblin-service/mysql-cluster/mysql-pv.yaml    | 26 ++++++++
 .../mysql-cluster/standalone-application.conf      | 75 ++++++++++++++++++++++
 12 files changed, 294 insertions(+), 15 deletions(-)

diff --git a/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh b/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
index 86960cc..8bbd2c8 100755
--- a/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
+++ b/gobblin-docker/gobblin-service/alpine-gaas-latest/entrypoint.sh
@@ -17,5 +17,4 @@
 #
 GOBBLIN_HOME="$(cd `dirname $0`/..; pwd)"
 
-./bin/gobblin.sh service gobblin-as-service start --log-to-stdout $@
-
+./bin/gobblin.sh service gobblin-as-service start --log-to-stdout "$@"
diff --git a/gobblin-kubernetes/gobblin-service/base-cluster/README.md b/gobblin-kubernetes/gobblin-service/base-cluster/README.md
new file mode 100644
index 0000000..5f0b544
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/base-cluster/README.md
@@ -0,0 +1,4 @@
+# Kubernetes Cluster For GaaS
+
+This folder includes the files needed to create and run a GaaS instance, using the FS as the communication mechansim between GaaS and Gobblin Standalone
+
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
similarity index 97%
copy from gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
copy to gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
index 65ed8c1..c50a4b7 100644
--- a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
+++ b/gobblin-kubernetes/gobblin-service/base-cluster/application.yaml
@@ -30,7 +30,7 @@ spec:
           image: will97/gobblin-as-a-service:latest
           volumeMounts:
             - name: shared-jobs
-              mountPath: /tmp/gobblin-service/jobs
+              mountPath: /tmp/gobblin-as-service/jobs
             - name: shared-template-catalogs
               mountPath: /tmp/templateCatalog
 
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/ingress.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/ingress.yaml
similarity index 100%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/ingress.yaml
rename to gobblin-kubernetes/gobblin-service/base-cluster/ingress.yaml
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/storage.yaml b/gobblin-kubernetes/gobblin-service/base-cluster/storage.yaml
similarity index 100%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/storage.yaml
rename to gobblin-kubernetes/gobblin-service/base-cluster/storage.yaml
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md b/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md
new file mode 100644
index 0000000..137970f
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/README.md
@@ -0,0 +1,4 @@
+# MySQL K8s Cluster
+
+- Adds MySQL as the SpecStore
+- Adds configuration files to be loaded as configMaps, allowing configuration changes to GaaS without rebuilding a new image.
diff --git a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
similarity index 50%
rename from gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
rename to gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
index 65ed8c1..20a3226 100644
--- a/gobblin-kubernetes/gobblin-service/basic-cluster/application.yaml
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/application.yaml
@@ -19,26 +19,46 @@ spec:
         app: gaas
     spec:
       volumes:
-        - name: 'shared-jobs'
+        - name: shared-jobs
           persistentVolumeClaim:
             claimName: shared-jobs-claim
-        - name: 'shared-template-catalogs'
-          persistentVolumeClaim:
-            claimName: shared-template-catalogs-claim
+        - name: gaas-config
+          configMap:
+            name: gaas-config
       containers:
         - name: gobblin-service
           image: will97/gobblin-as-a-service:latest
+          command: ["./bin/entrypoint.sh"]
+          args: ["--jvmopts", "-DmysqlCredentials.user=$(MYSQL_USERNAME) -DmysqlCredentials.password=$(MYSQL_PASSWORD)"]
+          env:
+            - name: MYSQL_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  name: mysql-credentials
+                  key: username
+            - name: MYSQL_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: mysql-credentials
+                  key: password
           volumeMounts:
             - name: shared-jobs
-              mountPath: /tmp/gobblin-service/jobs
-            - name: shared-template-catalogs
-              mountPath: /tmp/templateCatalog
+              mountPath: /tmp/gobblin-as-service/jobs
+            - name: gaas-config
+              mountPath: /home/gobblin/conf/gobblin-as-service/application.conf
+              subPath: gaas-application.conf
+      # dependency on mysql to be initialized before gaas can be initialized
+      initContainers:
+        - name: init-mysql
+          image: busybox:1.28
+          command: ["sh", "-c", "until nslookup mysql; do echo waiting for mysql; sleep 2; done;"]
+
 
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: 'gobblin-standalone-deployment'
+  name: gobblin-standalone-deployment
   labels:
     app: gobblin-standalone-deployment
 spec:
@@ -53,15 +73,21 @@ spec:
         app: gobblin-standalone
     spec:
       volumes:
-        - name: 'shared-jobs'
+        - name: shared-jobs
           persistentVolumeClaim:
             claimName: shared-jobs-claim
+        - name: standalone-config
+          configMap:
+            name: standalone-config
       containers:
         - name: gobblin-standalone
           image: will97/gobblin-standalone:latest
           volumeMounts:
             - name: shared-jobs
               mountPath: /tmp/gobblin-standalone/jobs
+            - name: standalone-config
+              mountPath: /home/gobblin/conf/standalone/application.conf
+              subPath: standalone-application.conf
 ---
 apiVersion: v1
 kind: Service
@@ -70,10 +96,10 @@ metadata:
   labels:
     app: gobblin-service
 spec:
-  type: ClusterIP
+  type: NodePort
   ports:
-    - protocol: TCP
-      port: 6956
+    - port: 6956
+      protocol: TCP
       targetPort: 6956
   selector:
     app: gaas
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf b/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf
new file mode 100644
index 0000000..b2f9366
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/gaas-application.conf
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Sample configuration properties for the Gobblin Service
+
+# Topology Catalog and Store
+gobblin.service.work.dir=/tmp/gobblin-as-service
+
+# TopologySpec Factory
+topologySpec.store.dir=${gobblin.service.work.dir}/topologySpecStore
+topologySpecFactory.topologyNames=localGobblinCluster
+topologySpecFactory.localGobblinCluster.description="StandaloneClusterTopology"
+topologySpecFactory.localGobblinCluster.version="1"
+topologySpecFactory.localGobblinCluster.uri="gobblinCluster"
+topologySpecFactory.localGobblinCluster.specExecutorInstance.class="org.apache.gobblin.runtime.spec_executorInstance.LocalFsSpecExecutor"
+topologySpecFactory.localGobblinCluster.specExecInstance.capabilities="source:dest"
+topologySpecFactory.localGobblinCluster.gobblin.cluster.localSpecProducer.dir=${gobblin.service.work.dir}/jobs
+
+# Flow Catalog and Store
+flowSpec.store.dir=${gobblin.service.work.dir}/flowSpecStore
+
+# Template Catalog
+gobblin.service.templateCatalogs.fullyQualifiedPath="file://"
+
+# JobStatusMonitor
+gobblin.service.jobStatusMonitor.enabled=false
+
+# FsJobStatusRetriever
+fsJobStatusRetriever.state.store.dir=${gobblin.service.work.dir}/state-store
+
+# DagManager
+gobblin.service.dagManager.enabled=true
+gobblin.service.dagManager.jobStatusRetriever.class="org.apache.gobblin.service.monitoring.FsJobStatusRetriever"
+gobblin.service.dagManager.dagStateStoreClass="org.apache.gobblin.service.modules.orchestration.FSDagStateStore"
+gobblin.service.dagManager.dagStateStoreDir=${gobblin.service.work.dir}/dagStateStoreDir
+
+# RestLI
+gobblin.service.port=6956
+
+# MySQL State Store
+flowSpec.store.class="org.apache.gobblin.runtime.spec_store.MysqlSpecStore"
+flowSpec.serde.class="org.apache.gobblin.runtime.spec_serde.GsonFlowSpecSerDe"
+state.store.factory.class="org.apache.gobblin.metastore.MysqlJobStatusStateStoreFactory"
+
+mysqlSpecStore.state.store.db.table="flow_spec_store"
+
+# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.svc.cluster.local, see https://github.com/kubernetes/dns/blob/master/docs/specification.md
+mysqlSpecStore.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
+mysqlSpecStore.state.store.db.user=${mysqlCredentials.user}
+mysqlSpecStore.state.store.db.password=${mysqlCredentials.password}
+
+# MySQL Job Status Retriever
+jobStatusRetriever.class="org.apache.gobblin.service.monitoring.MysqlJobStatusRetriever"
+mysqlJobStatusRetriever.state.store.db.table="gaas_job_status"
+
+# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.cluster.local
+mysqlJobStatusRetriever.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
+mysqlJobStatusRetriever.state.store.db.user=${mysqlCredentials.user}
+mysqlJobStatusRetriever.state.store.db.password=${mysqlCredentials.password}
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml
new file mode 100644
index 0000000..9899123
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/kustomization.yaml
@@ -0,0 +1,17 @@
+resources:
+  - application.yaml
+  - mysql-deployment.yaml
+  - mysql-pv.yaml
+configMapGenerator:
+  - name: gaas-config
+    files:
+      - ./gaas-application.conf
+  - name: standalone-config
+    files:
+      - ./standalone-application.conf
+secretGenerator:
+  # this should be replaced with references to files/vars stored securely
+  - name: mysql-credentials
+    literals:
+      - username=default-user
+      - password=default-password
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml
new file mode 100644
index 0000000..a949979
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-deployment.yaml
@@ -0,0 +1,55 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: mysql
+spec:
+  ports:
+    - protocol: TCP
+      port: 3306
+      targetPort: 3306
+  selector:
+    app: mysql
+---
+apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
+kind: Deployment
+metadata:
+  name: mysql
+spec:
+  selector:
+    matchLabels:
+      app: mysql
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: mysql
+    spec:
+      volumes:
+        - name: mysql-persistent-storage
+          persistentVolumeClaim:
+            claimName: mysql-pv-claim
+      containers:
+        - image: mysql:5.6
+          name: mysql
+          env:
+          - name: MYSQL_RANDOM_ROOT_PASSWORD
+            value: "yes"
+          - name: MYSQL_DATABASE
+            value: "gaas_db"
+          - name: MYSQL_USER
+            valueFrom:
+              secretKeyRef:
+                name: mysql-credentials
+                key: username
+          - name: MYSQL_PASSWORD
+            valueFrom:
+              secretKeyRef:
+                name: mysql-credentials
+                key: password
+          ports:
+            - containerPort: 3306
+              name: mysql
+          volumeMounts:
+            - name: mysql-persistent-storage
+              mountPath: /var/lib/mysql
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml
new file mode 100644
index 0000000..77d58d9
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/mysql-pv.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: mysql-pv-volume
+  labels:
+    type: local
+spec:
+  storageClassName: manual
+  capacity:
+    storage: 1Gi
+  accessModes:
+    - ReadWriteOnce
+  hostPath:
+    path: "/mnt/data"
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: mysql-pv-claim
+spec:
+  storageClassName: manual
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
diff --git a/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf b/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf
new file mode 100644
index 0000000..77e1182
--- /dev/null
+++ b/gobblin-kubernetes/gobblin-service/mysql-cluster/standalone-application.conf
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Thread pool settings for the task executor
+taskexecutor.threadpool.size=2
+taskretry.threadpool.coresize=1
+taskretry.threadpool.maxsize=2
+
+# File system URIs
+fs.uri=file:///
+writer.fs.uri=${fs.uri}
+state.store.fs.uri=${fs.uri}
+
+# Writer related configuration properties
+writer.output.format=AVRO
+writer.staging.dir=${env:GOBBLIN_WORK_DIR}/task-staging
+writer.output.dir=${env:GOBBLIN_WORK_DIR}/task-output
+
+# Data publisher related configuration properties
+data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher
+data.publisher.final.dir=${env:GOBBLIN_WORK_DIR}/job-output
+data.publisher.replace.final.dir=false
+
+# Directory where job configuration files are stored
+jobconf.dir=${env:GOBBLIN_JOB_CONFIG_DIR}
+jobconf.fullyQualifiedPath=file://${env:GOBBLIN_JOB_CONFIG_DIR}
+
+# Directory where job/task state files are stored
+state.store.dir=${env:GOBBLIN_WORK_DIR}/state-store
+
+# Directory where commit sequences are stored
+gobblin.runtime.commit.sequence.store.dir=${env:GOBBLIN_WORK_DIR}/commit-sequence-store
+
+# Directory where error files from the quality checkers are stored
+qualitychecker.row.err.file=${env:GOBBLIN_WORK_DIR}/err
+
+# Directory where job locks are stored
+job.lock.dir=${env:GOBBLIN_WORK_DIR}/locks
+
+# Directory where metrics log files are stored
+metrics.log.dir=${env:GOBBLIN_WORK_DIR}/metrics
+
+# Enable metrics / events
+metrics.enabled=true
+
+# UI
+#admin.server.enabled=false
+admin.server.enabled=true
+admin.server.port=9000
+
+rest.server.host=localhost
+rest.server.port=9090
+
+# job history store ( WARN [GobblinYarnAppLauncher] NOT starting the admin UI because the job execution info server is NOT enabled )
+job.execinfo.server.enabled=false
+job.history.store.enabled=false
+task.status.reportintervalinms=5000
+
+# The time gap for Job Detector to detect modification/deletion/creation of jobconfig.
+# Unit in milliseconds, configurable.
+jobconf.monitor.interval=30000