You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by yw...@apache.org on 2019/10/14 01:32:06 UTC

[bigtop] branch cnb updated: BIGTOP-3230: Import the groundwork for CNB

This is an automated email from the ASF dual-hosted git repository.

ywkim pushed a commit to branch cnb
in repository https://gitbox.apache.org/repos/asf/bigtop.git


The following commit(s) were added to refs/heads/cnb by this push:
     new e76ae87  BIGTOP-3230: Import the groundwork for CNB
e76ae87 is described below

commit e76ae872de0c51f6ed405d34dcff7db1a7411339
Author: Youngwoo Kim <yw...@apache.org>
AuthorDate: Mon Oct 7 12:09:43 2019 +0900

    BIGTOP-3230: Import the groundwork for CNB
---
 POSTINSTALL_EXAMPLE.md                             |  26 ++
 README.md                                          | 354 +++++++--------------
 README.md => README.md.BAK                         |   0
 README_STORAGE.md                                  |  14 +
 data/a.txt                                         |   9 +
 data/b.txt                                         |  15 +
 get_helm.sh                                        | 245 ++++++++++++++
 hbase-helm/.helmignore                             |  21 ++
 hbase-helm/Chart.yaml                              |  13 +
 hbase-helm/README.md                               |   1 +
 hbase-helm/templates/NOTES.txt                     |  26 ++
 hbase-helm/templates/_helpers.yaml                 |  14 +
 hbase-helm/templates/hadoop-configmap.yaml         | 296 +++++++++++++++++
 hbase-helm/templates/hbase-configmap.yaml          |  67 ++++
 hbase-helm/templates/hbase-master-pdb.yaml         |  17 +
 hbase-helm/templates/hbase-master-statefulset.yaml |  59 ++++
 hbase-helm/templates/hbase-master-svc.yaml         |  26 ++
 hbase-helm/templates/hbase-rs-pdb.yaml             |  17 +
 hbase-helm/templates/hbase-rs-statefulset.yaml     |  59 ++++
 hbase-helm/templates/hbase-rs-svc.yaml             |  22 ++
 hbase-helm/templates/hdfs-dn-pdb.yaml              |  17 +
 hbase-helm/templates/hdfs-dn-pvc.yaml              |  25 ++
 hbase-helm/templates/hdfs-dn-statefulset.yaml      |  82 +++++
 hbase-helm/templates/hdfs-dn-svc.yaml              |  23 ++
 hbase-helm/templates/hdfs-nn-pdb.yaml              |  18 ++
 hbase-helm/templates/hdfs-nn-pvc.yaml              |  25 ++
 hbase-helm/templates/hdfs-nn-statefulset.yaml      |  82 +++++
 hbase-helm/templates/hdfs-nn-svc.yaml              |  23 ++
 hbase-helm/tools/calc_resources.sh                 |  75 +++++
 hbase-helm/values.yaml                             |  55 ++++
 kafka/README.md                                    |  25 ++
 kafka/kafka.yaml                                   |   7 +
 minio/object-store.yaml                            |  63 ++++
 minio/operator.yaml                                |  99 ++++++
 ms-spark/README.md                                 |  21 ++
 ms-spark/core-site.xml                             |  49 +++
 ms-spark/log4j.properties                          |  12 +
 ms-spark/spark-defaults.conf                       |   1 +
 ms-spark/spark-deployment.yaml                     |  94 ++++++
 ms-spark/spark-env.sh                              |   1 +
 nifi/README.md                                     |  21 ++
 nifi/volume.yaml                                   |  31 ++
 presto3-minio/README.md                            |  32 ++
 presto3-minio/coordinator/Dockerfile               |   6 +
 presto3-minio/coordinator/README.md                |   8 +
 presto3-minio/coordinator/config.properties        |   8 +
 presto3-minio/coordinator/json                     |   2 +
 presto3-minio/coordinator/minio.properties         |  14 +
 presto3-minio/presto-deployment-master.yaml        |  26 ++
 presto3-minio/presto-deployment-worker.yaml        |  26 ++
 presto3-minio/presto-deployment.yaml               |  23 ++
 presto3-minio/service.yaml                         |   9 +
 presto3-minio/test.sh                              |  19 ++
 presto3-minio/worker/Dockerfile                    |   5 +
 presto3-minio/worker/README.md                     |   6 +
 presto3-minio/worker/config.properties             |   7 +
 presto3-minio/worker/json                          |   2 +
 presto3-minio/worker/minio.properties              |  14 +
 storage-provisioner/README.md                      |  28 ++
 59 files changed, 2151 insertions(+), 234 deletions(-)

diff --git a/POSTINSTALL_EXAMPLE.md b/POSTINSTALL_EXAMPLE.md
new file mode 100755
index 0000000..67b7823
--- /dev/null
+++ b/POSTINSTALL_EXAMPLE.md
@@ -0,0 +1,26 @@
+
+
+# Example of what needs to be done...
+
+Minio can be accessed via port 9000 on the following DNS name from within your cluster:
+minio.bigdata.svc.cluster.local
+
+To access Minio from localhost, run the below commands:
+
+  1. export POD_NAME=$(kubectl get pods --namespace bigdata -l "release=minio" -o jsonpath="{.items[0].metadata.name}")
+
+  2. kubectl port-forward $POD_NAME 9000 --namespace bigdata
+
+Read more about port forwarding here: http://kubernetes.io/docs/user-guide/kubectl/kubectl_port-forward/
+
+You can now access Minio server on http://localhost:9000. Follow the below steps to connect to Minio server with mc client:
+
+  1. Download the Minio mc client - https://docs.minio.io/docs/minio-client-quickstart-guide
+
+  2. mc config host add minio-local http://localhost:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY S3v4
+
+  3. mc ls minio-local
+
+Alternately, you can use your browser or the Minio SDK to access the server - https://docs.minio.io/categories/17
+
+
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index b0c516c..5bec287
--- a/README.md
+++ b/README.md
@@ -1,261 +1,147 @@
-[![Travis CI](https://img.shields.io/travis/apache/bigtop.svg?branch=master)](https://travis-ci.org/apache/bigtop)
+This is the content for the talk given by jay vyas and sid mani @ apachecon 2019 in Las Vegas,  you can watch it here  https://www.youtube.com/watch?v=LUCE63q !
 
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements. See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to You under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
+# TLDR, heres how you create an analytics distro on K8s...
 
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-
-[Apache Bigtop](http://bigtop.apache.org/)
-==========================================
-
-...is a project for the development of packaging and tests of the [Apache Hadoop](http://hadoop.apache.org/) ecosystem.
-
-The primary goal of Apache Bigtop is to build a community around the packaging and interoperability testing of Apache Hadoop-related projects. This includes testing at various levels (packaging, platform, runtime, upgrade, etc...) developed by a community with a focus on the system as a whole, rather than individual projects.
-
-Immediately Get Started with Deployment and Smoke Testing of BigTop
-===================================================================
-
-The simplest way to get a feel for how bigtop works, is to just cd into `provisioner` and try out the recipes under vagrant or docker.  Each one rapidly spins up, and runs the bigtop smoke tests on, a local bigtop based big data distribution. Once you get the gist, you can hack around with the recipes to learn how the puppet/rpm/smoke-tests all work together, going deeper into the components you are interested in as described below.
-
-Quick overview of source code directories
-=========================================
-
-* __bigtop-deploy__ : deployment scripts and puppet stuff for Apache Bigtop.
-* __bigtop-packages__ : RPM/DEB specifications for Apache Bigtop subcomponents.
-* __bigtop-test-framework__ : The source code for the iTest utilities (framework used by smoke tests).
-* __bigtop-tests__ :
-* __test-artifacts__ : source for tests.
-* __test-execution__ : maven pom drivers for running the integration tests found in test-artifacts.
-* __bigtop-toolchain__ : puppet scripts for setting up an instance which can build Apache Bigtop, sets up utils like jdk/maven/protobufs/...
-* __provisioner__ : Vagrant and Docker Provisioner that automatically spin up Hadoop environment with one click.
-* __docker__ : Dockerfiles and Docker Sandbox build scripts.
-
-Also, there is a new project underway, Apache Bigtop blueprints, which aims to create templates/examples that demonstrate/compare various Apache Hadoop ecosystem components with one another.
-
-Contributing
-============
-
-There are lots of ways to contribute.  People with different expertise can help with various subprojects:
-
-* __puppet__ : Much of the Apache Bigtop deploy and packaging tools use puppet to bootstrap and set up a cluster. But recipes for other tools are also welcome (ie. Chef, Ansible, etc.)
-* __groovy__ : Primary language used to write the Apache Bigtop smokes and itest framework.
-* __maven__ : Used to build Apache Bigtop smokes and also to define the high level Apache Bigtop project.
-* __RPM/DEB__ : Used to package Apache Hadoop ecosystem related projects into GNU/Linux installable packages for most popular GNU/Linux distributions. So one could add a new project or improve existing packages.
-* __hadoop__ : Apache Hadoop users can also contribute by using the Apache Bigtop smokes, improving them, and evaluating their breadth.
-* __contributing your workloads__ : Contributing your workloads enable us to tests projects against real use cases and enable you to have people verifying the use cases you care about are always working.
-* __documentation__ : We are always in need of a better documentation!
-* __giving feedback__ : Tell us how you use Apache Bigtop, what was great and what was not so great. Also, what are you expecting from it and what would you like to see in the future?
-
-Also, opening [JIRA's](https://issues.apache.org/jira/browse/BIGTOP) and getting started by posting on the mailing list is helpful.
-
-What do people use Apache Bigtop for?
-==============================
-
-You can go to the [Apache Bigtop website](http://bigtop.apache.org/) for notes on how to do "common" tasks like:
-
-  * Apache Hadoop App developers: Download an Apache Bigtop built Apache Hadoop 2.0 VM from the website, so you can have a running psuedodistributed Apache Hadoop cluster to test your code on.
-  * Cluster administers or deployment gurus: Run the Apache Bigtop smoke tests to ensure that your cluster is working.
-  * Vendors: Build your own Apache Hadoop distribution, customized from Apache Bigtop bits.
-
-Getting Started
-===============
-
-Below are some recipes for getting started with using Apache Bigtop. As Apache Bigtop has different subprojects, these recipes will continue to evolve.
-For specific questions it's always a good idea to ping the mailing list at dev-subscribe@bigtop.apache.org to get some immediate feedback, or [open a JIRA](https://issues.apache.org/jira/browse/BIGTOP).
-
-For Users: Running the smoke tests
------------------------------------
-
-The simplest way to test bigtop is described in bigtop-tests/smoke-tests/README file
-
-For integration (API level) testing with maven, read on.
-
-For Users: Running the integration tests
------------------------------------------
-
-WARNING: since testing packages requires installing them on a live system it is highly recommended to use VMs for that. Testing Apache Bigtop is done using iTest framework. The tests are organized in maven submodules, with one submodule per Apache Bigtop component.  The bigtop-tests/test-execution/smokes/pom.xml defines all submodules to be tested, and each submodule is in its own directory under smokes/, for example:
-
-*smokes/hadoop/pom.xml*
-*smokes/hive/pom.xml*
-*... and so on.*
-
-* New way (with Gradle build in place)
-  * Step 1: install smoke tests for one or more components
-    * Example 1:
-
-        gradle installTestArtifacts
-
-    * Example 2: Installing just Hadoop-specific smoke tests
-
-        gradle install-hadoop
-
-  * Step 2: Run the the smoke tests on your cluster (see Step 3 and/or Step 4 below)
-
-  We are on the route of migrating subprojects under top-level gradle build. Currently
-  converted projects could be listed by running
-
-        gradle projects
-
-  To see the list of tasks in a subproject, ie itest-common, you can run
-
-        gradle itest-common:tasks
-
-* Old Way
-  * Step 1: Build the smokes with snapshots.  This ensures that all transitive dependencies etc.. are in your repo
-
-        mvn clean install -DskipTests -DskipITs -DperformRelease -f ./bigtop-test-framework/pom.xml
-        mvn clean install -DskipTests -DskipITs -DperformRelease -f ./test-artifacts/pom.xml
-
-  * Step 2: Now, rebuild in "offline" mode.  This will make sure that your local changes to bigtop are embeded in the changes.
-
-        mvn clean install -DskipTests -DskipITs -DperformRelease -o -nsu -f ./bigtop-test-framework/pom.xml
-        mvn clean install -DskipTests -DskipITs -DperformRelease -o -nsu -f ./bigtop-tests/test-artifacts/pom.xml
-
-  * Step 3: Now, you can run the smoke tests on your cluster.
-    * Example 1: Running all the smoke tests with TRACE level logging (shows std out from each mr job).
-
-            mvn clean verify -Dorg.apache.bigtop.itest.log4j.level=TRACE -f ./bigtop/bigtop-tests/test-execution/smokes/pom.xml
-
-    * Just running hadoop examples, nothing else.
-
-            mvn clean verify -D'org.apache.maven-failsafe-plugin.testInclude=**/*TestHadoopExamples*' -f bigtop-tests/test-execution/smokes/hadoop/pom.xml
-
-    Note: A minor bug/issue: you need the "testInclude" regular expression above, even if you don't want to customize the tests,
-    since existing test names don't follow the maven integration test naming convention of IT*, but instead, follow the surefire (unit test) convention of Test*.
-
-For Users: Creating Your Own Apache Hadoop Environment
------------------------------------------------
-
-Another common use case for Apache Bigtop is creating / setting up your own Apache Hadoop distribution.  
-For details on this, check out the bigtop-deploy/README.md file, which describes how to use the puppet repos
-to create and setup your VMs.
-You can also try out provisioner to quickly get the idea how it works.
-
-For Developers: Building the entire distribution from scratch
--------------------------------------------------------------
-
-Packages have been built for CentOS, Fedora, OpenSUSE, Ubuntu, and Debian. They can probably be built for other platforms as well. Some of the binary artifacts might be compatible with other closely related distributions.
-
-__On all systems, Building Apache Bigtop requires certain set of tools__
-
-  To bootstrap the development environment from scratch execute
-
-    ./gradlew toolchain
-
-  This build task expected Puppet to be installed; user has to have sudo permissions. The task will pull down and install
-  all development dependencies, frameworks and SDKs, required to build the stack on your platform.
-
-  To immediately set environment after running toolchain, run
-
-    . /etc/profile.d/bigtop.sh
-
-* __Building packages__ : `gradle [component-name]-pkg`
-
-  If -Dbuildwithdeps=true is set, the Gradle will follow the order of the build specified in
-  the "dependencies" section of bigtop.bom file. Otherwise just a single component will get build (original behavior).
-
-  To use an alternative definition of a stack composition (aka BOM), specify its
-  name with -Dbomfile=<filename> system property in the build time.
-
-  You can visualize all tasks dependencies by running `gradle tasks --all`
-* __Building local YUM/APT repositories__ : `gradle [yum|apt]`
-
-* __Recommended build environments__
-
-  Bigtop provides "development in the can" environments, using Docker containers.
-  These have the build tools set by the toolchain, as well as the user and build
-  environment configured and cached. All currently supported OSes could be pulled
-  from official Bigtop repository at https://hub.docker.com/r/bigtop/slaves/tags/
+```
+helm install stable/nfs-server-provisioner ; kubectl patch storageclass nfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
+Minio:  kubectl -n minio create secret generic my-minio-secret --from-literal=accesskey=minio --from-literal=secretkey=minio123
+helm install --set existingSecret=my-minio-secret stable/minio --namespace=minio --name=minio
+Nifi: helm repo add cetic https://cetic.github.io/helm-charts ; helm install nifi --namespace=minio
+Kafka:  helm repo add incubator http://storage.googleapis.com/kubernetes-charts-incubator $ helm install --name my-kafka incubator/kafka , kubectl edit statefulset kafka 
+ envFrom:
+        - configMapRef:
+            name: kafka-cm
+Spark: kubectl create configmap spark-conf --from-file=core-site.xml --from-file=log4j.properties --from-file=spark-defaults.conf --from-file=spark-env.sh -n bigdata ; helm install microsoft/spark --version 1.0.0 --namespace=minio
+Presto: cd ./presto3-minio/ , kubectl create -f - -n minio
 
-  To build a component (bigtop-groovy) for a particular OS (ubuntu-16.04) you can
-  run the following from a clone of Bigtop workspace (assuming your system has
-  Docker engine setup and working)
-  ```docker run --rm -u jenkins:jenkins -v `pwd`:/ws --workdir /ws bigtop/slaves:trunk-ubuntu-16.04
-  bash -l -c './gradlew allclean ; ./gradlew bigtop-groovy-pkg'```
+```
 
-For Developers: Building and modifying the web site
----------------------------------------------------
+# Problem
 
-The website can be built by running `mvn site:site` from the root directory of the
-project.  The main page can be accessed from "project_root/target/site/index.html".
+Installation of things has been commoditized by containers and K8s.  The more important
+problems we have nowadays are around interoperation, learning, and integration of different
+tools for different problems in the analytics space.
 
-The source for the website is located in "project_root/src/site/".
+Modern data scientists need 'batteries included' frameworks that can be used to model and
+address different types of analytics problems over time, which can replicate the integrated
+functionality of AWS, GCP, and so on.
 
+# Current Status
 
-For Developers: Building a component from Git repository
---------------------------------------------------------
+This repository currently integrates installation of a full analytics stack for kubernetes
+with batteries included, including storage.
 
-To fetch source from a Git repository, there're two ways to achieve this:
-a). modify `bigtop.bom` and add JSON snippets to your component/package, or
-b). specify properties at command line
+```
+                       +----------------+
+                       |                |    XXX           XXX          XXXXXX
+                       |    NIFI        |XXXXX  XXX       XX  XXX     XXX    XX
+                       |                |         XX    XXX     XX    X       XX
+                       |                |          XXXXXX        XXXXXX        X
+                       +-----+----------+                                     X
++-------------+              |                                                X
+|             |              |                                                XXXXXX
+|    Kafka    |              |                                                      XXXX
+|             |              |                         +----------------+           XXXX
++-----+-------+              |                         |                |     XXXXXXX
+      |                      |                         |  Zepplin       |    XX
+      |               +------v------+                  |                |    XXXXXX
+      +-------------->+             |                  |                |         X
+                      |    Zookeeper+-------+          +-----------+----+         X
+                      |             |       |                      |           X  X  XX
+                      +-------------+       |                      |           XX X XX
+                                            |                      |            XXXXX
+                                            |                      |
+                                            |                      |  +--------v------+
+                                            v                      +> | Spark         |
+                                    +-------+----------+---+          |               |
+                                    |                  |   |          |               |
+                                    |    Volume PRovisioner|          +---------------+
+                                    |    (NFS or hostpath) |
+                                    |                  |   |
+                                    +-------------^----+---+ .            (Presto)
+                                                  ^                          |
+                                                  |                          |
+                                                  |                          V
+                                                  |                +---------------+
+                                                  |                |               |
+                                                  |                |               |
+                                                  +----------------+   Minio       |
+                                                                   |               |
+                                                                   +---------------+
+```
 
-* __bigtop.bom__
+If all services are deployed succesfully, you ultimately will have an inventory looking like this:
 
-Add following JSON snippets to the desired component/package:
 
 ```
-git     { repo = ""; ref = ""; dir = ""; commit_hash = "" }
+$> kubectl get pods -n bigdata
+NAME                                          READY   STATUS    RESTARTS   AGE
+coordinator-56956c8d84-hgxvc                  1/1     Running   0          34s
+fantastic-chipmunk-livy-5856779cf8-w8wlr      1/1     Running   0          3d1h
+fantastic-chipmunk-master-55f5945997-mbvbm    1/1     Running   0          3d
+fantastic-chipmunk-worker-5f7f468b8f-mwnmg    1/1     Running   1          3d1h
+fantastic-chipmunk-worker-5f7f468b8f-zkbrw    1/1     Running   0          3d1h
+fantastic-chipmunk-zeppelin-7958b9477-vv25d   1/1     Running   0          3d1h
+hbase-hbase-master-0                          1/1     Running   0          4h4m
+hbase-hbase-rs-0                              1/1     Running   2          4h7m
+hbase-hbase-rs-1                              1/1     Running   1          4h5m
+hbase-hbase-rs-2                              1/1     Running   0          4h4m
+hbase-hdfs-dn-0                               1/1     Running   1          4h7m
+hbase-hdfs-dn-1                               1/1     Running   0          4h5m
+hbase-hdfs-dn-2                               1/1     Running   0          4h5m
+hbase-hdfs-nn-0                               1/1     Running   0          4h7m
+minio-7bf4678799-cd8qz                        1/1     Running   0          3d22h
+my-kafka-0                                    1/1     Running   0          27h
+my-kafka-1                                    1/1     Running   0          27h
+my-kafka-2                                    1/1     Running   0          27h
+nifi-0                                        4/4     Running   0          2d3h
+nifi-zookeeper-0                              1/1     Running   0          2d3h
+nifi-zookeeper-1                              1/1     Running   0          2d3h
+nifi-zookeeper-2                              1/1     Running   0          2d3h
+worker-565c7c858-pjlpg                        1/1     Running   0          34s
 ```
 
-  * `repo` - SSH, HTTP or local path to Git repo.
-  * `ref` - branch, tag or commit hash to check out.
-  * `dir` - [OPTIONAL] directory name to write source into.
-  * `commit_hash` - [OPTIONAL] a commit hash to reset to.
-
-Some packages have different names for source directory and source tarball
-(`hbase-0.98.5-src.tar.gz` contains `hbase-0.98.5` directory).
-By default source will be fetched in a directory named by `tarball { source = TARBALL_SRC }`
-without `.t*` extension.
-To explicitly set directory name use the `dir` option.
+# Modifications from generic charts or recipes
 
-When `commit_hash` specified, the repo to build the package will be reset to the commit hash.
+configuration isnt really externalized very well in most off the shelf helm charts.  The other obvious missing link is that storage isnt provided for you, which is a problem for folks that don't know how to do things in K8s.   We've externalized configuration for all files (i.e. see spark as a canonical example of this) into configmaps and unified zookeeper instances into a single instances for ease of deployment here.  Also, this repo has *tested* different helm repos / yaml files to se [...]
+the way it should.  
 
-Example for HBase:
+For example, the stable helm charts don't properly configure zepplin, allow for empty storage on ZK, or inject config into kafka as you'd want to be able to in certain scenarios.  In this repo, everything should *just work* provided you create things in *the right order*.
 
-```
-      name    = 'hbase'
-      version { base = '1.3.2'; pkg = base; release = 1 }
-      git     { repo = "https://github.com/apache/hbase.git"
-                ref  = "branch-1.3"
-                dir  = "${name}-${version.base}"
-                commit_hash = "1bedb5bfbb5a99067e7bc54718c3124f632b6e17"
-              }
-```
+# Instructions.
 
-* __command line__
+1. First , install an NFS volume provisioner from the instructions storage/ directory
+2. Then follow the other instructions in the storage README
+3. Now, install components one by one from the README.md files in the processing/ directory.
 
+This will yield the following analytics distro, all running in the bigdata namespace (make sure to use
+`--namespace=bigdata` or similar on all `helm install` or `kubectl create` directives).  IF you mess anything up
+do `helm list` (find your installation, i.e. XYZ) followed by `helm delete XYZ`  to clear out your components.
 
-```
-./gradlew COMPONENT-pkg -Pgit_repo="" -Pgit_ref="" -Pgit_dir="" -Pgit_commit_hash="" -Pbase_version=""
-```
+In particular, this repo modifies stock helm charts in a variety of ways to make things work together.
 
-Where `git_repo`, `git_ref`, `git_dir`, and `git_commit_hash` are exactly the same with what we set in JSON.
-And `base_version` is to overwrite:
-```
-      version { base = ''}
-```
-
-Example for Kafka:
+1. We don't use stable/spark because its *old*.  Instead we use microsofts spark, which comes integrated
+with zepplin properly.
+2. We use configmaps for configuration of *spark*.  For spark, this allows us to inject
+different types of configuration stuff from the kuberentes level, rather then baking them into the image (note that 
+you cant just inject a single file from a config map, b/c it overwrites the whole directory).  This allows us
+to inject minio access properties into spark itself, while also injecting other config.
+3. For Kafka, we config map the environment variables so that we can use the same zookeeper instance as 
+NiFi.  
+4. For Presto, the configuration parameters for workers/masters are all injected also via config map.  We use
+a fork of https://github.com/dharmeshkakadia/presto-kubernetes for this change (PR's are submitted to make this upstream).
+5. For minio there arent any major changes needed out of the box, except using emptyDir for storage if you dont have a volume provisioner.
+6. For HBase, we also reuse the same zookeeper instance that is used via NIFI and kafka.  For now we use the nifi zk deployment but at some point we will make ZK a first class citizen.
 
-```
-./gradlew kafka-pkg-ind -Pgit_repo=https://github.com/apache/kafka.git -Pgit_ref=trunk -Pgit_commit_hash=dc0601a1c604bea3f426ed25b6c20176ff444079 -Pbase_version=2.2.0
-```
+============================================ 
 
-You can mix both ways to build from Git, but command line always overwrites `bigtop.bom`.
+Notes and Ideas
+ 
+# Inspiration 
 
+Recently saw https://github.com/dacort/damons-data-lake.
+- A problem set that is increasingly relevant: lots of sources, real time, unstructured warehouse/lake.
+- No upstream plug-and-play alternative to cloud native services stack.
+- Infrastructure, storage, networking is the hardest part.
 
-Contact us
-----------
 
-You can get in touch with us on [the Apache Bigtop mailing lists](http://bigtop.apache.org/mail-lists.html).
diff --git a/README.md b/README.md.BAK
similarity index 100%
copy from README.md
copy to README.md.BAK
diff --git a/README_STORAGE.md b/README_STORAGE.md
new file mode 100755
index 0000000..431261a
--- /dev/null
+++ b/README_STORAGE.md
@@ -0,0 +1,14 @@
+This inlcudes the various storage recipes curated for
+use in a bigdata distro that would run on a cloud native platform.
+
+
+
+- Minio: Global object store to support spark/kafka/etc
+  You can install it from the yamls in this repo, or else,
+  `helm install --name minio stable/minio --namespace=bigdata` directly.
+
+- Hbase: For use by tools like PredictionIO.
+  For installation,
+   - git clone https://github.com/warp-poke/hbase-helm
+   - cd to hbase-helm
+   - modify configmap to use nifi-zookeeper as the zk.quorum field.
diff --git a/data/a.txt b/data/a.txt
new file mode 100755
index 0000000..31c8b06
--- /dev/null
+++ b/data/a.txt
@@ -0,0 +1,9 @@
+as
+df
+sc
+c
+sd
+cs
+s
+s
+ss
diff --git a/data/b.txt b/data/b.txt
new file mode 100755
index 0000000..12a5688
--- /dev/null
+++ b/data/b.txt
@@ -0,0 +1,15 @@
+
+sc
+c
+c
+c
+d
+d
+e
+3
+c
+wkckw
+k2e
+k2
+k2
+kc
diff --git a/get_helm.sh b/get_helm.sh
new file mode 100755
index 0000000..ea2056c
--- /dev/null
+++ b/get_helm.sh
@@ -0,0 +1,245 @@
+#!/usr/bin/env bash
+
+# Copyright The Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The install script is based off of the MIT-licensed script from glide,
+# the package manager for Go: https://github.com/Masterminds/glide.sh/blob/master/get
+
+PROJECT_NAME="helm"
+TILLER_NAME="tiller"
+
+: ${USE_SUDO:="true"}
+: ${HELM_INSTALL_DIR:="/usr/local/bin"}
+
+# initArch discovers the architecture for this system.
+initArch() {
+  ARCH=$(uname -m)
+  case $ARCH in
+    armv5*) ARCH="armv5";;
+    armv6*) ARCH="armv6";;
+    armv7*) ARCH="arm";;
+    aarch64) ARCH="arm64";;
+    x86) ARCH="386";;
+    x86_64) ARCH="amd64";;
+    i686) ARCH="386";;
+    i386) ARCH="386";;
+  esac
+}
+
+# initOS discovers the operating system for this system.
+initOS() {
+  OS=$(echo `uname`|tr '[:upper:]' '[:lower:]')
+
+  case "$OS" in
+    # Minimalist GNU for Windows
+    mingw*) OS='windows';;
+  esac
+}
+
+# runs the given command as root (detects if we are root already)
+runAsRoot() {
+  local CMD="$*"
+
+  if [ $EUID -ne 0 -a $USE_SUDO = "true" ]; then
+    CMD="sudo $CMD"
+  fi
+
+  $CMD
+}
+
+# verifySupported checks that the os/arch combination is supported for
+# binary builds.
+verifySupported() {
+  local supported="darwin-386\ndarwin-amd64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nlinux-ppc64le\nwindows-386\nwindows-amd64"
+  if ! echo "${supported}" | grep -q "${OS}-${ARCH}"; then
+    echo "No prebuilt binary for ${OS}-${ARCH}."
+    echo "To build from source, go to https://github.com/helm/helm"
+    exit 1
+  fi
+
+  if ! type "curl" > /dev/null && ! type "wget" > /dev/null; then
+    echo "Either curl or wget is required"
+    exit 1
+  fi
+}
+
+# checkDesiredVersion checks if the desired version is available.
+checkDesiredVersion() {
+  if [ "x$DESIRED_VERSION" == "x" ]; then
+    # Get tag from release URL
+    local latest_release_url="https://github.com/helm/helm/releases/latest"
+    if type "curl" > /dev/null; then
+      TAG=$(curl -Ls -o /dev/null -w %{url_effective} $latest_release_url | grep -oE "[^/]+$" )
+    elif type "wget" > /dev/null; then
+      TAG=$(wget $latest_release_url --server-response -O /dev/null 2>&1 | awk '/^  Location: /{DEST=$2} END{ print DEST}' | grep -oE "[^/]+$")
+    fi
+  else
+    TAG=$DESIRED_VERSION
+  fi
+}
+
+# checkHelmInstalledVersion checks which version of helm is installed and
+# if it needs to be changed.
+checkHelmInstalledVersion() {
+  if [[ -f "${HELM_INSTALL_DIR}/${PROJECT_NAME}" ]]; then
+    local version=$(helm version -c | grep '^Client' | cut -d'"' -f2)
+    if [[ "$version" == "$TAG" ]]; then
+      echo "Helm ${version} is already ${DESIRED_VERSION:-latest}"
+      return 0
+    else
+      echo "Helm ${TAG} is available. Changing from version ${version}."
+      return 1
+    fi
+  else
+    return 1
+  fi
+}
+
+# downloadFile downloads the latest binary package and also the checksum
+# for that binary.
+downloadFile() {
+  HELM_DIST="helm-$TAG-$OS-$ARCH.tar.gz"
+  DOWNLOAD_URL="https://get.helm.sh/$HELM_DIST"
+  CHECKSUM_URL="$DOWNLOAD_URL.sha256"
+  HELM_TMP_ROOT="$(mktemp -dt helm-installer-XXXXXX)"
+  HELM_TMP_FILE="$HELM_TMP_ROOT/$HELM_DIST"
+  HELM_SUM_FILE="$HELM_TMP_ROOT/$HELM_DIST.sha256"
+  echo "Downloading $DOWNLOAD_URL"
+  if type "curl" > /dev/null; then
+    curl -SsL "$CHECKSUM_URL" -o "$HELM_SUM_FILE"
+  elif type "wget" > /dev/null; then
+    wget -q -O "$HELM_SUM_FILE" "$CHECKSUM_URL"
+  fi
+  if type "curl" > /dev/null; then
+    curl -SsL "$DOWNLOAD_URL" -o "$HELM_TMP_FILE"
+  elif type "wget" > /dev/null; then
+    wget -q -O "$HELM_TMP_FILE" "$DOWNLOAD_URL"
+  fi
+}
+
+# installFile verifies the SHA256 for the file, then unpacks and
+# installs it.
+installFile() {
+  HELM_TMP="$HELM_TMP_ROOT/$PROJECT_NAME"
+  local sum=$(openssl sha1 -sha256 ${HELM_TMP_FILE} | awk '{print $2}')
+  local expected_sum=$(cat ${HELM_SUM_FILE})
+  if [ "$sum" != "$expected_sum" ]; then
+    echo "SHA sum of ${HELM_TMP_FILE} does not match. Aborting."
+    exit 1
+  fi
+
+  mkdir -p "$HELM_TMP"
+  tar xf "$HELM_TMP_FILE" -C "$HELM_TMP"
+  HELM_TMP_BIN="$HELM_TMP/$OS-$ARCH/$PROJECT_NAME"
+  TILLER_TMP_BIN="$HELM_TMP/$OS-$ARCH/$TILLER_NAME"
+  echo "Preparing to install $PROJECT_NAME and $TILLER_NAME into ${HELM_INSTALL_DIR}"
+  runAsRoot cp "$HELM_TMP_BIN" "$HELM_INSTALL_DIR"
+  echo "$PROJECT_NAME installed into $HELM_INSTALL_DIR/$PROJECT_NAME"
+  if [ -x "$TILLER_TMP_BIN" ]; then
+    runAsRoot cp "$TILLER_TMP_BIN" "$HELM_INSTALL_DIR"
+    echo "$TILLER_NAME installed into $HELM_INSTALL_DIR/$TILLER_NAME"
+  else
+    echo "info: $TILLER_NAME binary was not found in this release; skipping $TILLER_NAME installation"
+  fi
+}
+
+# fail_trap is executed if an error occurs.
+fail_trap() {
+  result=$?
+  if [ "$result" != "0" ]; then
+    if [[ -n "$INPUT_ARGUMENTS" ]]; then
+      echo "Failed to install $PROJECT_NAME with the arguments provided: $INPUT_ARGUMENTS"
+      help
+    else
+      echo "Failed to install $PROJECT_NAME"
+    fi
+    echo -e "\tFor support, go to https://github.com/helm/helm."
+  fi
+  cleanup
+  exit $result
+}
+
+# testVersion tests the installed client to make sure it is working.
+testVersion() {
+  set +e
+  HELM="$(which $PROJECT_NAME)"
+  if [ "$?" = "1" ]; then
+    echo "$PROJECT_NAME not found. Is $HELM_INSTALL_DIR on your "'$PATH?'
+    exit 1
+  fi
+  set -e
+  echo "Run '$PROJECT_NAME init' to configure $PROJECT_NAME."
+}
+
+# help provides possible cli installation arguments
+help () {
+  echo "Accepted cli arguments are:"
+  echo -e "\t[--help|-h ] ->> prints this help"
+  echo -e "\t[--version|-v <desired_version>] . When not defined it defaults to latest"
+  echo -e "\te.g. --version v2.4.0  or -v latest"
+  echo -e "\t[--no-sudo]  ->> install without sudo"
+}
+
+# cleanup temporary files to avoid https://github.com/helm/helm/issues/2977
+cleanup() {
+  if [[ -d "${HELM_TMP_ROOT:-}" ]]; then
+    rm -rf "$HELM_TMP_ROOT"
+  fi
+}
+
+# Execution
+
+#Stop execution on any error
+trap "fail_trap" EXIT
+set -e
+
+# Parsing input arguments (if any)
+export INPUT_ARGUMENTS="${@}"
+set -u
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    '--version'|-v)
+       shift
+       if [[ $# -ne 0 ]]; then
+           export DESIRED_VERSION="${1}"
+       else
+           echo -e "Please provide the desired version. e.g. --version v2.4.0 or -v latest"
+           exit 0
+       fi
+       ;;
+    '--no-sudo')
+       USE_SUDO="false"
+       ;;
+    '--help'|-h)
+       help
+       exit 0
+       ;;
+    *) exit 1
+       ;;
+  esac
+  shift
+done
+set +u
+
+initArch
+initOS
+verifySupported
+checkDesiredVersion
+if ! checkHelmInstalledVersion; then
+  downloadFile
+  installFile
+fi
+testVersion
+cleanup
diff --git a/hbase-helm/.helmignore b/hbase-helm/.helmignore
new file mode 100755
index 0000000..f0c1319
--- /dev/null
+++ b/hbase-helm/.helmignore
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
diff --git a/hbase-helm/Chart.yaml b/hbase-helm/Chart.yaml
new file mode 100755
index 0000000..2ec13cf
--- /dev/null
+++ b/hbase-helm/Chart.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+description: Hbase Helm to deploy the distributed versions of warp10
+name: hbase
+version: 1.0.4
+appVersion: 2.7.3
+hbase.version: 1.4.2
+home: https://hadoop.apache.org/
+sources:
+- https://github.com/apache/hadoop
+icon: http://hadoop.apache.org/images/hadoop-logo.jpg
+maintainers:
+- name: Pierre Zemb
+  email: pierre.zemb.isen@gmnail.com
diff --git a/hbase-helm/README.md b/hbase-helm/README.md
new file mode 100755
index 0000000..8341bc7
--- /dev/null
+++ b/hbase-helm/README.md
@@ -0,0 +1 @@
+# forked from the upstream helm charts.
diff --git a/hbase-helm/templates/NOTES.txt b/hbase-helm/templates/NOTES.txt
new file mode 100755
index 0000000..85c632a
--- /dev/null
+++ b/hbase-helm/templates/NOTES.txt
@@ -0,0 +1,26 @@
+1. You can check the status of HDFS by running this command:
+   kubectl exec -n {{ .Release.Namespace }} -it {{ template "hbase.name" . }}-hdfs-nn-0 -- /usr/local/hadoop/bin/hdfs dfsadmin -report
+
+2. You can list the yarn nodes by running this command:
+   kubectl exec -n {{ .Release.Namespace }} -it {{ template "hbase.name" . }}-yarn-rm-0 -- /usr/local/hadoop/bin/yarn node -list
+
+3. Create a port-forward to the yarn resource manager UI:
+   kubectl port-forward -n {{ .Release.Namespace }} {{ template "hbase.name" . }}-yarn-rm-0 8088:8088 
+
+   Then open the ui in your browser:
+   
+   open http://localhost:8088
+
+4. You can run included hadoop tests like this:
+   kubectl exec -n {{ .Release.Namespace }} -it {{ template "hbase.name" . }}-yarn-nm-0 -- /usr/local/hadoop/bin/hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-{{ .Values.hadoopVersion }}-tests.jar TestDFSIO -write -nrFiles 5 -fileSize 128MB -resFile /tmp/TestDFSIOwrite.txt
+
+5. You can list the mapreduce jobs like this:
+   kubectl exec -n {{ .Release.Namespace }} -it {{ template "hbase.name" . }}-yarn-rm-0 -- /usr/local/hadoop/bin/mapred job -list
+
+6. This chart can also be used with the zeppelin chart
+    helm install --namespace {{ .Release.Namespace }} --set hadoop.useConfigMap=true,hadoop.configMapName={{ template "hbase.name" . }} stable/zeppelin
+
+7. You can scale the number of yarn nodes like this:
+   helm upgrade {{ .Release.Name }} --set yarn.nodeManager.replicas=4 stable/hadoop
+
+   Make sure to update the values.yaml if you want to make this permanent.
diff --git a/hbase-helm/templates/_helpers.yaml b/hbase-helm/templates/_helpers.yaml
new file mode 100755
index 0000000..31528af
--- /dev/null
+++ b/hbase-helm/templates/_helpers.yaml
@@ -0,0 +1,14 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "hadoop.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "hbase.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
\ No newline at end of file
diff --git a/hbase-helm/templates/hadoop-configmap.yaml b/hbase-helm/templates/hadoop-configmap.yaml
new file mode 100755
index 0000000..b982ee3
--- /dev/null
+++ b/hbase-helm/templates/hadoop-configmap.yaml
@@ -0,0 +1,296 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: hadoop-configmap
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+data:
+  bootstrap.sh: |
+    #!/bin/bash
+
+    : ${HADOOP_PREFIX:=/usr/local/hadoop}
+
+    . $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
+
+    # Directory to find config artifacts
+    CONFIG_DIR="/tmp/hadoop-config"
+
+    # Copy config files from volume mount
+
+    for f in slaves core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml; do
+      if [[ -e ${CONFIG_DIR}/$f ]]; then
+        cp ${CONFIG_DIR}/$f $HADOOP_PREFIX/etc/hadoop/$f
+      else
+        echo "ERROR: Could not find $f in $CONFIG_DIR"
+        exit 1
+      fi
+    done
+
+    # installing libraries if any - (resource urls added comma separated to the ACP system variable)
+    cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do  echo == $cp; curl -LO $cp ; done; cd -
+
+    if [[ "${HOSTNAME}" =~ "hdfs-nn" ]]; then
+      mkdir -p /root/hdfs/namenode
+      $HADOOP_PREFIX/bin/hdfs namenode -format -force -nonInteractive
+      sed -i s/{{ template "hbase.name" . }}-hdfs-nn/0.0.0.0/ /usr/local/hadoop/etc/hadoop/core-site.xml
+      $HADOOP_PREFIX/sbin/hadoop-daemon.sh start namenode
+    fi
+
+    if [[ "${HOSTNAME}" =~ "hdfs-dn" ]]; then
+      mkdir -p /root/hdfs/datanode
+
+      #  wait up to 30 seconds for namenode 
+      (while [[ $count -lt 15 && -z `curl -sf http://{{ template "hbase.name" . }}-hdfs-nn:50070` ]]; do ((count=count+1)) ; echo "Waiting for {{ template "hbase.name" . }}-hdfs-nn" ; sleep 2; done && [[ $count -lt 15 ]])
+      [[ $? -ne 0 ]] && echo "Timeout waiting for hdfs-nn, exiting." && exit 1
+
+      $HADOOP_PREFIX/sbin/hadoop-daemon.sh start datanode
+    fi
+
+    if [[ "${HOSTNAME}" =~ "yarn-rm" ]]; then
+      sed -i s/{{ template "hbase.name" . }}-yarn-rm/0.0.0.0/ $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
+      cp ${CONFIG_DIR}/start-yarn-rm.sh $HADOOP_PREFIX/sbin/
+      cd $HADOOP_PREFIX/sbin
+      chmod +x start-yarn-rm.sh
+      ./start-yarn-rm.sh
+    fi
+
+    if [[ "${HOSTNAME}" =~ "yarn-nm" ]]; then
+      sed -i '/<\/configuration>/d' $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
+      cat >> $HADOOP_PREFIX/etc/hadoop/yarn-site.xml <<- EOM
+      <property>
+        <name>yarn.nodemanager.resource.memory-mb</name>
+        <value>${MY_MEM_LIMIT:-2048}</value>
+      </property>
+
+      <property>
+        <name>yarn.nodemanager.resource.cpu-vcores</name>
+        <value>${MY_CPU_LIMIT:-2}</value>
+      </property>
+    EOM
+      echo '</configuration>' >> $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
+      cp ${CONFIG_DIR}/start-yarn-nm.sh $HADOOP_PREFIX/sbin/
+      cd $HADOOP_PREFIX/sbin
+      chmod +x start-yarn-nm.sh
+
+      #  wait up to 30 seconds for resourcemanager
+      (while [[ $count -lt 15 && -z `curl -sf http://{{ template "hbase.name" . }}-yarn-rm:8088/ws/v1/cluster/info` ]]; do ((count=count+1)) ; echo "Waiting for {{ template "hbase.name" . }}-yarn-rm" ; sleep 2; done && [[ $count -lt 15 ]])
+      [[ $? -ne 0 ]] && echo "Timeout waiting for yarn-rm, exiting." && exit 1
+
+      ./start-yarn-nm.sh
+    fi
+
+    if [[ $1 == "-d" ]]; then
+      until find ${HADOOP_PREFIX}/logs -mmin -1 | egrep -q '.*'; echo "`date`: Waiting for logs..." ; do sleep 2 ; done
+      tail -F ${HADOOP_PREFIX}/logs/* &
+      while true; do sleep 1000; done
+    fi
+
+    if [[ $1 == "-bash" ]]; then
+      /bin/bash
+    fi
+
+  core-site.xml: |
+    <?xml version="1.0"?>
+    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+    <configuration>
+      <property>
+            <name>fs.defaultFS</name>
+            <value>hdfs://{{ template "hbase.name" . }}-hdfs-nn:9000/</value>
+            <description>NameNode URI</description>
+        </property>
+    </configuration>
+
+  hdfs-site.xml: |
+    <?xml version="1.0"?>
+    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+    <configuration>
+      <property>
+        <name>dfs.datanode.use.datanode.hostname</name>
+        <value>false</value>
+      </property>
+
+      <property>
+        <name>dfs.client.use.datanode.hostname</name>
+        <value>false</value>
+      </property>
+
+      <property>
+        <name>dfs.replication</name>
+          <value>3</value>
+      </property>
+
+      <property>
+        <name>dfs.datanode.data.dir</name>
+        <value>file:///root/hdfs/datanode</value>
+        <description>DataNode directory</description>
+      </property>
+
+      <property>
+        <name>dfs.namenode.name.dir</name>
+        <value>file:///root/hdfs/namenode</value>
+        <description>NameNode directory for namespace and transaction logs storage.</description>
+      </property>
+
+      <property>
+        <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+        <value>false</value>
+      </property>
+    </configuration>
+
+  mapred-site.xml: |
+    <?xml version="1.0"?>
+    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+    <configuration>
+      <property>
+        <name>mapreduce.framework.name</name>
+        <value>yarn</value>
+      </property>
+      <property>
+        <name>mapreduce.jobhistory.address</name>
+        <value>{{ template "hbase.name" . }}-yarn-rm-0.{{ template "hbase.name" . }}-yarn-rm.{{ .Release.Namespace }}.svc.cluster.local:10020</value>
+      </property>
+      <property>
+        <name>mapreduce.jobhistory.webapp.address</name>
+        <value>{{ template "hbase.name" . }}-yarn-rm-0.{{ template "hbase.name" . }}-yarn-rm.{{ .Release.Namespace }}.svc.cluster.local:19888</value>
+      </property>
+    </configuration>
+
+  slaves: |
+    localhost
+
+  start-yarn-nm.sh: |
+    #!/usr/bin/env bash
+
+    # Licensed to the Apache Software Foundation (ASF) under one or more
+    # contributor license agreements.  See the NOTICE file distributed with
+    # this work for additional information regarding copyright ownership.
+    # The ASF licenses this file to You under the Apache License, Version 2.0
+    # (the "License"); you may not use this file except in compliance with
+    # the License.  You may obtain a copy of the License at
+    #
+    #     http://www.apache.org/licenses/LICENSE-2.0
+    #
+    # Unless required by applicable law or agreed to in writing, software
+    # distributed under the License is distributed on an "AS IS" BASIS,
+    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    # See the License for the specific language governing permissions and
+    # limitations under the License.
+
+
+    # Start all yarn daemons.  Run this on master node.
+
+    echo "starting yarn daemons"
+
+    bin=`dirname "${BASH_SOURCE-$0}"`
+    bin=`cd "$bin"; pwd`
+
+    DEFAULT_LIBEXEC_DIR="$bin"/../libexec
+    HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
+    . $HADOOP_LIBEXEC_DIR/yarn-config.sh
+
+    # start resourceManager
+    # "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start resourcemanager
+    # start nodeManager
+    "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start nodemanager
+    # start proxyserver
+    #"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start proxyserver
+
+  start-yarn-rm.sh: |
+    #!/usr/bin/env bash
+
+    # Licensed to the Apache Software Foundation (ASF) under one or more
+    # contributor license agreements.  See the NOTICE file distributed with
+    # this work for additional information regarding copyright ownership.
+    # The ASF licenses this file to You under the Apache License, Version 2.0
+    # (the "License"); you may not use this file except in compliance with
+    # the License.  You may obtain a copy of the License at
+    #
+    #     http://www.apache.org/licenses/LICENSE-2.0
+    #
+    # Unless required by applicable law or agreed to in writing, software
+    # distributed under the License is distributed on an "AS IS" BASIS,
+    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    # See the License for the specific language governing permissions and
+    # limitations under the License.
+
+
+    # Start all yarn daemons.  Run this on master node.
+
+    echo "starting yarn daemons"
+
+    bin=`dirname "${BASH_SOURCE-$0}"`
+    bin=`cd "$bin"; pwd`
+
+    DEFAULT_LIBEXEC_DIR="$bin"/../libexec
+    HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
+    . $HADOOP_LIBEXEC_DIR/yarn-config.sh
+
+    # start resourceManager
+    "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start resourcemanager
+    # start nodeManager
+    # "$bin"/yarn-daemons.sh --config $YARN_CONF_DIR  start nodemanager
+    # start proxyserver
+    "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start proxyserver
+
+  yarn-site.xml: |
+    <?xml version="1.0"?>
+    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+    <configuration>
+      <property>
+        <name>yarn.resourcemanager.hostname</name>
+        <value>{{ template "hbase.name" . }}-yarn-rm</value>
+      </property>
+
+      <property>
+        <name>yarn.nodemanager.vmem-check-enabled</name>
+        <value>false</value>
+      </property>
+
+      <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value>mapreduce_shuffle</value>
+      </property>
+
+      <property>
+        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
+        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
+      </property>
+
+      <property>
+        <description>List of directories to store localized files in.</description>
+        <name>yarn.nodemanager.local-dirs</name>
+        <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
+      </property>
+
+      <property>
+        <description>Where to store container logs.</description>
+        <name>yarn.nodemanager.log-dirs</name>
+        <value>/var/log/hadoop-yarn/containers</value>
+      </property>
+
+      <property>
+        <description>Where to aggregate logs to.</description>
+        <name>yarn.nodemanager.remote-app-log-dir</name>
+        <value>/var/log/hadoop-yarn/apps</value>
+      </property>
+
+      <property>
+        <name>yarn.application.classpath</name>
+        <value>
+          /usr/local/hadoop/etc/hadoop,
+          /usr/local/hadoop/share/hadoop/common/*,
+          /usr/local/hadoop/share/hadoop/common/lib/*,
+          /usr/local/hadoop/share/hadoop/hdfs/*,
+          /usr/local/hadoop/share/hadoop/hdfs/lib/*,
+          /usr/local/hadoop/share/hadoop/mapreduce/*,
+          /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
+          /usr/local/hadoop/share/hadoop/yarn/*,
+          /usr/local/hadoop/share/hadoop/yarn/lib/*
+        </value>
+      </property>
+    </configuration>
diff --git a/hbase-helm/templates/hbase-configmap.yaml b/hbase-helm/templates/hbase-configmap.yaml
new file mode 100755
index 0000000..8fdaf67
--- /dev/null
+++ b/hbase-helm/templates/hbase-configmap.yaml
@@ -0,0 +1,67 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: hbase-configmap
+  labels:
+    app: {{ template "hbase.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+data:
+  hbase-site.xml: |
+    <configuration>
+      <property>
+        <name>hbase.master</name>
+        <value>{{ template "hbase.name" . }}-hbase-master:16010</value>
+      </property>
+      <property>
+        <name>zookeeper.znode.parent</name>
+        <value>/hbase</value>
+      </property>
+      <property>
+        <name>hbase.rootdir</name>
+        <value>hdfs://{{ template "hbase.name" . }}-hdfs-nn:9000/hbase</value>
+      </property>
+      <property>
+        <name>hbase.cluster.distributed</name>
+        <value>true</value>
+      </property>
+      <property>
+        <name>hbase.zookeeper.quorum</name>
+        <value>{{ .Values.hbase.zookeeper.quorum }}</value>
+      </property>
+    </configuration>
+
+  bootstrap.sh: |
+    #!/bin/bash
+
+    : ${HADOOP_PREFIX:=/usr/local/hadoop}
+
+    . $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
+
+    # Directory to find config artifacts
+    CONFIG_DIR="/tmp/hbase-config"
+
+    # Copy config files from volume mount
+
+    for f in hbase-site.xml; do
+      if [[ -e ${CONFIG_DIR}/$f ]]; then
+        cp ${CONFIG_DIR}/$f /opt/hbase/conf/$f
+      else
+        echo "ERROR: Could not find $f in $CONFIG_DIR"
+        exit 1
+      fi
+    done
+
+    # installing libraries if any - (resource urls added comma separated to the ACP system variable)
+    cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do  echo == $cp; curl -LO $cp ; done; cd -
+
+    if [[ "${HOSTNAME}" =~ "hbase-master" ]]; then
+      /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start master
+    fi
+
+    if [[ "${HOSTNAME}" =~ "hbase-rs" ]]; then
+      /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start regionserver
+    fi
+
+    tail -f opt/hbase/logs/*
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-master-pdb.yaml b/hbase-helm/templates/hbase-master-pdb.yaml
new file mode 100755
index 0000000..9104fab
--- /dev/null
+++ b/hbase-helm/templates/hbase-master-pdb.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-master
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-master
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "hadoop.name" . }}
+      release: {{ .Release.Name }}
+      component: hbase-master
+  minAvailable: 1 # FIXME: HA?
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-master-statefulset.yaml b/hbase-helm/templates/hbase-master-statefulset.yaml
new file mode 100755
index 0000000..a265c86
--- /dev/null
+++ b/hbase-helm/templates/hbase-master-statefulset.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-master
+  annotations:
+    checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }}
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-master
+spec:
+  serviceName: {{ template "hbase.name" . }}-hbase-master
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: {{ template "hadoop.name" . }}
+        release: {{ .Release.Name }}
+        component: hbase-master
+    spec:
+      affinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+          - topologyKey: "kubernetes.io/hostname"
+            labelSelector:
+              matchLabels:
+                app:  {{ template "hadoop.name" . }}
+                release: {{ .Release.Name | quote }}
+                component: hdfs-nn
+      terminationGracePeriodSeconds: 0
+      containers:
+      - name: hbase-master
+        image: {{ .Values.hbaseImage }}
+        imagePullPolicy: {{ .Values.imagePullPolicy }}
+        command:
+        - "/bin/bash"
+        - "/tmp/hbase-config/bootstrap.sh"
+        resources:
+{{ toYaml .Values.hdfs.nameNode.resources | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 16010
+          initialDelaySeconds: 5
+          timeoutSeconds: 2
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 16010
+          initialDelaySeconds: 10
+          timeoutSeconds: 2
+        volumeMounts:
+        - name: hbase-config
+          mountPath: /tmp/hbase-config
+      volumes:
+      - name: hbase-config
+        configMap:
+          name: hbase-configmap
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-master-svc.yaml b/hbase-helm/templates/hbase-master-svc.yaml
new file mode 100755
index 0000000..6e79406
--- /dev/null
+++ b/hbase-helm/templates/hbase-master-svc.yaml
@@ -0,0 +1,26 @@
+# A headless service to create DNS records
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-master
+  labels:
+    app: {{ template "hbase.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-master
+spec:
+  ports:
+  - port: 8080
+    name: restapi
+  - port: 9090
+    name: thriftapi
+  - port: 16000
+    name: master
+  - port: 16010
+    name: masterinfo
+  clusterIP: "None"
+  selector:
+    app: {{ template "hbase.name" . }}
+    release: {{ .Release.Name }}
+    component: hbase-master
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-rs-pdb.yaml b/hbase-helm/templates/hbase-rs-pdb.yaml
new file mode 100755
index 0000000..d3ed0e2
--- /dev/null
+++ b/hbase-helm/templates/hbase-rs-pdb.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-rs
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-rs
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "hadoop.name" . }}
+      release: {{ .Release.Name }}
+      component: hbase-rs
+  minAvailable: 1 # FIXME: HA?
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-rs-statefulset.yaml b/hbase-helm/templates/hbase-rs-statefulset.yaml
new file mode 100755
index 0000000..794b733
--- /dev/null
+++ b/hbase-helm/templates/hbase-rs-statefulset.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-rs
+  annotations:
+    checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }}
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-rs
+spec:
+  serviceName: {{ template "hbase.name" . }}-hbase-rs
+  replicas: {{ .Values.hdfs.dataNode.replicas }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "hadoop.name" . }}
+        release: {{ .Release.Name }}
+        component: hbase-rs
+    spec:
+      affinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+          - topologyKey: "kubernetes.io/hostname"
+            labelSelector:
+              matchLabels:
+                app:  {{ template "hadoop.name" . }}
+                release: {{ .Release.Name | quote }}
+                component: hdfs-nn
+      terminationGracePeriodSeconds: 0
+      containers:
+      - name: hbase-rs
+        image: {{ .Values.hbaseImage }}
+        imagePullPolicy: {{ .Values.imagePullPolicy }}
+        command:
+        - "/bin/bash"
+        - "/tmp/hbase-config/bootstrap.sh"
+        resources:
+{{ toYaml .Values.hdfs.nameNode.resources | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 16030
+          initialDelaySeconds: 5
+          timeoutSeconds: 2
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 16030
+          initialDelaySeconds: 10
+          timeoutSeconds: 2
+        volumeMounts:
+        - name: hbase-config
+          mountPath: /tmp/hbase-config
+      volumes:
+      - name: hbase-config
+        configMap:
+          name: hbase-configmap
\ No newline at end of file
diff --git a/hbase-helm/templates/hbase-rs-svc.yaml b/hbase-helm/templates/hbase-rs-svc.yaml
new file mode 100755
index 0000000..95c541b
--- /dev/null
+++ b/hbase-helm/templates/hbase-rs-svc.yaml
@@ -0,0 +1,22 @@
+# A headless service to create DNS records
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "hbase.name" . }}-hbase-rs
+  labels:
+    app: {{ template "hbase.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hbase-rs
+spec:
+  clusterIP: "None"
+  ports:
+  - port: 16020
+    name: rs
+  - port: 16030
+    name: rsinfo
+  selector:
+    app: {{ template "hbase.name" . }}
+    release: {{ .Release.Name }}
+    component: hbase-rs
\ No newline at end of file
diff --git a/hbase-helm/templates/hdfs-dn-pdb.yaml b/hbase-helm/templates/hdfs-dn-pdb.yaml
new file mode 100755
index 0000000..93b9a42
--- /dev/null
+++ b/hbase-helm/templates/hdfs-dn-pdb.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-dn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-dn
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "hadoop.name" . }}
+      release: {{ .Release.Name }}
+      component: hdfs-dn
+  minAvailable: {{ .Values.hdfs.dataNode.pdbMinAvailable }}
\ No newline at end of file
diff --git a/hbase-helm/templates/hdfs-dn-pvc.yaml b/hbase-helm/templates/hdfs-dn-pvc.yaml
new file mode 100755
index 0000000..c2fd385
--- /dev/null
+++ b/hbase-helm/templates/hdfs-dn-pvc.yaml
@@ -0,0 +1,25 @@
+{{- if .Values.persistence.dataNode.enabled -}}
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-dn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-dn
+spec:
+  accessModes:
+  - {{ .Values.persistence.dataNode.accessMode | quote }}
+  resources:
+    requests:
+      storage: {{ .Values.persistence.dataNode.size | quote }}
+{{- if .Values.persistence.dataNode.storageClass }}
+{{- if (eq "-" .Values.persistence.dataNode.storageClass) }}
+  storageClassName: ""
+{{- else }}
+  storageClassName: "{{ .Values.persistence.dataNode.storageClass }}"
+{{- end }}
+{{- end }}
+{{- end -}}
diff --git a/hbase-helm/templates/hdfs-dn-statefulset.yaml b/hbase-helm/templates/hdfs-dn-statefulset.yaml
new file mode 100755
index 0000000..d0d36de
--- /dev/null
+++ b/hbase-helm/templates/hdfs-dn-statefulset.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-dn
+  annotations:
+    checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }}
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-dn
+spec:
+  serviceName: {{ template "hbase.name" . }}-hdfs-dn
+  replicas: {{ .Values.hdfs.dataNode.replicas }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "hadoop.name" . }}
+        release: {{ .Release.Name }}
+        component: hdfs-dn
+    spec:
+      affinity:
+        podAntiAffinity:
+        {{- if eq .Values.antiAffinity "hard" }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+          - topologyKey: "kubernetes.io/hostname"
+            labelSelector:
+              matchLabels:
+                app:  {{ template "hadoop.name" . }}
+                release: {{ .Release.Name | quote }}
+                component: hdfs-dn
+        {{- else if eq .Values.antiAffinity "soft" }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 5
+            podAffinityTerm:
+              topologyKey: "kubernetes.io/hostname"
+              labelSelector:
+                matchLabels:
+                  app:  {{ template "hadoop.name" . }}
+                  release: {{ .Release.Name | quote }}
+                  component: hdfs-dn
+        {{- end }}
+      terminationGracePeriodSeconds: 0
+      containers:
+      - name: hdfs-dn
+        image: {{ .Values.image }}
+        imagePullPolicy: {{ .Values.imagePullPolicy }}
+        command:
+           - "/bin/bash"
+           - "/tmp/hadoop-config/bootstrap.sh"
+           - "-d"
+        resources:       
+{{ toYaml .Values.hdfs.dataNode.resources | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 50075
+          initialDelaySeconds: 5
+          timeoutSeconds: 2
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 50075
+          initialDelaySeconds: 10
+          timeoutSeconds: 2
+        volumeMounts:
+        - name: hadoop-config
+          mountPath: /tmp/hadoop-config
+        - name: dfs
+          mountPath: /root/hdfs/datanode
+      volumes:
+      - name: hadoop-config
+        configMap:
+          name: hadoop-configmap
+      - name: dfs
+      {{- if .Values.persistence.dataNode.enabled }}
+        persistentVolumeClaim:
+          claimName: {{ template "hbase.name" . }}-hdfs-dn
+      {{- else }}        
+        emptyDir: {}
+      {{- end }}
diff --git a/hbase-helm/templates/hdfs-dn-svc.yaml b/hbase-helm/templates/hdfs-dn-svc.yaml
new file mode 100755
index 0000000..1b3f218
--- /dev/null
+++ b/hbase-helm/templates/hdfs-dn-svc.yaml
@@ -0,0 +1,23 @@
+# A headless service to create DNS records
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-dn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-dn
+spec:
+  clusterIP: "None"
+  ports:
+  - name: dfs
+    port: 9000
+    protocol: TCP
+  - name: webhdfs
+    port: 50075
+  selector:
+    app: {{ template "hadoop.name" . }}
+    release: {{ .Release.Name }}
+    component: hdfs-dn
\ No newline at end of file
diff --git a/hbase-helm/templates/hdfs-nn-pdb.yaml b/hbase-helm/templates/hdfs-nn-pdb.yaml
new file mode 100755
index 0000000..2563d40
--- /dev/null
+++ b/hbase-helm/templates/hdfs-nn-pdb.yaml
@@ -0,0 +1,18 @@
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-nn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-nn
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "hadoop.name" . }}
+      release: {{ .Release.Name }}
+      component: hdfs-nn
+  minAvailable: {{ .Values.hdfs.nameNode.pdbMinAvailable }}
+  
\ No newline at end of file
diff --git a/hbase-helm/templates/hdfs-nn-pvc.yaml b/hbase-helm/templates/hdfs-nn-pvc.yaml
new file mode 100755
index 0000000..302bfc3
--- /dev/null
+++ b/hbase-helm/templates/hdfs-nn-pvc.yaml
@@ -0,0 +1,25 @@
+{{- if .Values.persistence.nameNode.enabled -}}
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-nn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-nn
+spec:
+  accessModes:
+  - {{ .Values.persistence.nameNode.accessMode | quote }}
+  resources:
+    requests:
+      storage: {{ .Values.persistence.nameNode.size | quote }}
+{{- if .Values.persistence.nameNode.storageClass }}
+{{- if (eq "-" .Values.persistence.nameNode.storageClass) }}
+  storageClassName: ""
+{{- else }}
+  storageClassName: "{{ .Values.persistence.nameNode.storageClass }}"
+{{- end }}
+{{- end }}
+{{- end -}}
diff --git a/hbase-helm/templates/hdfs-nn-statefulset.yaml b/hbase-helm/templates/hdfs-nn-statefulset.yaml
new file mode 100755
index 0000000..e45b790
--- /dev/null
+++ b/hbase-helm/templates/hdfs-nn-statefulset.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-nn
+  annotations:
+    checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }}
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-nn
+spec:
+  serviceName: {{ template "hbase.name" . }}-hdfs-nn
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: {{ template "hadoop.name" . }}
+        release: {{ .Release.Name }}
+        component: hdfs-nn
+    spec:
+      affinity:
+        podAntiAffinity:
+        {{- if eq .Values.antiAffinity "hard" }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+          - topologyKey: "kubernetes.io/hostname"
+            labelSelector:
+              matchLabels:
+                app:  {{ template "hadoop.name" . }}
+                release: {{ .Release.Name | quote }}
+                component: hdfs-nn
+        {{- else if eq .Values.antiAffinity "soft" }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 5
+            podAffinityTerm:
+              topologyKey: "kubernetes.io/hostname"
+              labelSelector:
+                matchLabels:
+                  app:  {{ template "hadoop.name" . }}
+                  release: {{ .Release.Name | quote }}
+                  component: hdfs-nn
+        {{- end }}
+      terminationGracePeriodSeconds: 0
+      containers:
+      - name: hdfs-nn
+        image: {{ .Values.image }}
+        imagePullPolicy: {{ .Values.imagePullPolicy }}
+        command:
+        - "/bin/bash"
+        - "/tmp/hadoop-config/bootstrap.sh"
+        - "-d"
+        resources:
+{{ toYaml .Values.hdfs.nameNode.resources | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 50070
+          initialDelaySeconds: 5
+          timeoutSeconds: 2
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 50070
+          initialDelaySeconds: 10
+          timeoutSeconds: 2
+        volumeMounts:
+        - name: hadoop-config
+          mountPath: /tmp/hadoop-config
+        - name: dfs
+          mountPath: /root/hdfs/namenode
+      volumes:
+      - name: hadoop-config
+        configMap:
+          name: hadoop-configmap
+      - name: dfs
+      {{- if .Values.persistence.nameNode.enabled }}
+        persistentVolumeClaim:
+          claimName: {{ template "hbase.name" . }}-hdfs-nn
+      {{- else }}        
+        emptyDir: {}
+      {{- end }}
diff --git a/hbase-helm/templates/hdfs-nn-svc.yaml b/hbase-helm/templates/hdfs-nn-svc.yaml
new file mode 100755
index 0000000..1f5f100
--- /dev/null
+++ b/hbase-helm/templates/hdfs-nn-svc.yaml
@@ -0,0 +1,23 @@
+# A headless service to create DNS records
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "hbase.name" . }}-hdfs-nn
+  labels:
+    app: {{ template "hadoop.name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+    component: hdfs-nn
+spec:
+  clusterIP: "None"
+  ports:
+  - name: dfs
+    port: 9000
+    protocol: TCP
+  - name: webhdfs
+    port: 50070
+  selector:
+    app: {{ template "hadoop.name" . }}
+    release: {{ .Release.Name }}
+    component: hdfs-nn
\ No newline at end of file
diff --git a/hbase-helm/tools/calc_resources.sh b/hbase-helm/tools/calc_resources.sh
new file mode 100755
index 0000000..6005d31
--- /dev/null
+++ b/hbase-helm/tools/calc_resources.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# Calculates cluster resources given a percentage based on what is currently allocatable.
+# Related issue to programmatic resource query: https://github.com/kubernetes/kubernetes/issues/27404
+
+TARGET_PCT=$1
+
+[[ -z "${TARGET_PCT}" ]] && echo "USAGE: $0 <target percent>" && exit 1
+
+NODES=$(kubectl get nodes -o jsonpath='{.items..metadata.name}')
+NUM_NODES=$(echo "${NODES}" | tr ' ' '\n' | wc -l | xargs echo -n)
+
+TOTAL_CPU=$(kubectl get nodes -o jsonpath='{.items[0].status.allocatable.cpu}')
+# Convert CPU to nanocores
+TOTAL_CPU=$(bc <<< "${TOTAL_CPU} * 1000000000")
+
+# Start kube proxy to get to node stats summary api
+kubectl proxy >/dev/null 2>&1 &
+export kproxy=%1
+
+# Cleanup kproxy on exit
+function finish {
+  kill $kproxy
+}
+trap finish EXIT
+
+# Wait for proxy
+(while [[ $count -lt 5 && -z "$(curl -s localhost:8001/api/v1)" ]]; do ((count=count+1)) ; sleep 2; done && [[ $count -lt 5 ]])
+[[ $? -ne 0 ]] && echo "ERROR: could not start kube proxy to fetch node stats summary" && exit 1
+
+declare -a NODE_STATS
+declare -a AVAIL_CPU
+declare -a AVAIL_MEM
+i=0
+for NODE in ${NODES}; do
+    NODE_STATS[$i]=$(curl -sf localhost:8001/api/v1/proxy/nodes/${NODE}:10255/stats/summary)
+    [[ $? -ne 0 ]] && echo "ERROR: Could not get stats summary for node: ${NODE}" && exit 1
+
+    # Get available memory
+    AVAIL_MEM[$i]=$(jq '.node.memory.availableBytes' <<< "${NODE_STATS[$i]}")
+    AVAIL_MEM[$i]=$(bc -l <<< "scale=0; ${AVAIL_MEM[$i]}/1024/1024")
+    
+    # Derive available CPU
+    USED_CPU=$(jq '.node.cpu.usageNanoCores' <<< "${NODE_STATS[$i]}")
+    AVAIL_CPU[$i]=$(bc -l <<< "scale=2; (${TOTAL_CPU} - ${USED_CPU})/1000000")
+    ((i=i+1))
+done
+
+# Optimize per the min resources on any node.
+CORES=$(echo "${AVAIL_CPU[*]}" | tr ' ' '\n' | sort -n  | head -1)
+MEMORY=$(echo "${AVAIL_MEM[*]}" | tr ' ' '\n' | sort -n | head -1)
+
+# Subtract resources used by the chart. Note these are default values.
+HADOOP_SHARE_CPU=400
+CORES=$(bc -l <<< "scale=0; (${CORES} - ${HADOOP_SHARE_CPU})")
+
+HADOOP_SHARE_MEM=1024
+MEMORY=$(bc -l <<< "scale=0; (${MEMORY} - ${HADOOP_SHARE_MEM})")
+
+CPU_PER_NODE=$(bc -l <<< "scale=2; (${CORES} * ${TARGET_PCT}/100)")
+MEM_PER_NODE=$(bc -l <<< "scale=2; (${MEMORY} * ${TARGET_PCT}/100)")
+
+# Round cpu to lower mCPU
+CPU_PER_NODE=$(bc -l <<< "scale=0; ${CPU_PER_NODE} - (${CPU_PER_NODE} % 10)")
+
+# Round mem to lower Mi
+MEM_PER_NODE=$(bc -l <<< "scale=0; ${MEM_PER_NODE} - (${MEM_PER_NODE} % 100)")
+
+[[ "${CPU_PER_NODE/%.*/}" -lt 100 ]] && echo "WARN: Insufficient available CPU for scheduling" >&2
+[[ "${MEM_PER_NODE/%.*/}" -lt 2048 ]] && MEM_PER_NODE=2048.0 && echo "WARN: Insufficient available Memory for scheduling" >&2
+
+CPU_LIMIT=${CPU_PER_NODE/%.*/m}
+MEM_LIMIT=${MEM_PER_NODE/%.*/Mi}
+
+echo -n "--set yarn.nodeManager.replicas=${NUM_NODES},yarn.nodeManager.resources.requests.cpu=${CPU_LIMIT},yarn.nodeManager.resources.requests.memory=${MEM_LIMIT},yarn.nodeManager.resources.limits.cpu=${CPU_LIMIT},yarn.nodeManager.resources.limits.memory=${MEM_LIMIT}"
diff --git a/hbase-helm/values.yaml b/hbase-helm/values.yaml
new file mode 100755
index 0000000..3881bb8
--- /dev/null
+++ b/hbase-helm/values.yaml
@@ -0,0 +1,55 @@
+# The base hadoop image to use for all components.
+# See this repo for image build details: https://github.com/Comcast/kube-yarn/tree/master/image
+image: danisla/hadoop:2.7.3
+imagePullPolicy: IfNotPresent
+
+hbaseImage: pierrezemb/hbase-docker:distributed-1.3.1-hadoop-2.7.3
+hbaseVersion: 1.4.2
+
+# The version of the hadoop libraries being used in the image.
+hadoopVersion: 2.7.3
+
+# Select anitAffinity as either hard or soft, default is hard
+antiAffinity: "soft"
+
+hdfs:
+  nameNode:
+    pdbMinAvailable: 1
+
+    resources:
+      requests:
+        memory: "256Mi"
+        cpu: "10m"
+      limits:
+        memory: "2048Mi"
+        cpu: "1000m"
+
+  dataNode:
+    replicas: 3
+
+    pdbMinAvailable: 3
+
+    resources:
+      requests:
+        memory: "256Mi"
+        cpu: "10m"
+      limits:
+        memory: "2048Mi"
+        cpu: "1000m"
+
+persistence:
+  nameNode:
+    enabled: false
+    storageClass: "-"
+    accessMode: ReadWriteOnce
+    size: 50Gi
+
+  dataNode:
+    enabled: false
+    storageClass: "-"
+    accessMode: ReadWriteOnce
+    size: 200Gi
+
+hbase:
+  zookeeper:
+    quorum: "myzk-zookeeper"
\ No newline at end of file
diff --git a/kafka/README.md b/kafka/README.md
new file mode 100755
index 0000000..5499851
--- /dev/null
+++ b/kafka/README.md
@@ -0,0 +1,25 @@
+# This will use PVCs from a volume controller, similar to nifi.
+
+Check the existing NIFI deployment for how to do the volume controller.  
+
+`helm repo add incubator http://storage.googleapis.com/kubernetes-charts-incubator`
+ 
+`helm install --name my-kafka incubator/kafka`
+ 
+`helm delete --purge my-kafka`
+
+# Reuse Zookeeper
+
+
+To reuse zookeeper from the other examples, this helm chart needs to be modified
+to inject zookeeper.  For an example of how to do that, first create the configmap inthis
+directory.
+
+From there, modify the statefulset : `kubectl edit statefulset my-kafka`, adding this stanza
+to the end of the env declarations, which will cause it to reuse the ZK url specified inside 
+of the kafka implementation.
+```
+        envFrom:
+        - configMapRef:
+            name: kafka-cm
+```
diff --git a/kafka/kafka.yaml b/kafka/kafka.yaml
new file mode 100755
index 0000000..88c9049
--- /dev/null
+++ b/kafka/kafka.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  creationTimestamp: 2017-12-27T18:36:28Z
+  name: kafka-cm
+data:
+ KAFKA_ZOOKEEPER_CONNECT: "nifi-zookeeper:2181"
diff --git a/minio/object-store.yaml b/minio/object-store.yaml
new file mode 100755
index 0000000..2afc9ce
--- /dev/null
+++ b/minio/object-store.yaml
@@ -0,0 +1,63 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: minio-my-store-access-keys
+type: Opaque
+data:
+  # Base64 encoded string: "TEMP_DEMO_ACCESS_KEY"
+  username: bWluaW8K
+  # Base64 encoded string: "TEMP_DEMO_SECRET_KEY"
+  password: bWluaW8xMjMK
+---
+apiVersion: minio.rook.io/v1alpha1
+kind: ObjectStore
+metadata:
+  name: my-store
+spec:
+  scope:
+    nodeCount: 4
+    # You can have multiple PersistentVolumeClaims in the volumeClaimTemplates list.
+    # Be aware though that all PersistentVolumeClaim Templates will be used for each intance (see nodeCount).
+    volumeClaimTemplates:
+    - metadata:
+        name: rook-minio-data1
+      spec:
+        accessModes: [ "ReadWriteOnce" ]
+        # Set the storage class that will be used, otherwise Kubernetes' default storage class will be used.
+        #storageClassName: "my-storage-class"
+        resources:
+          requests:
+            storage: "1M"
+    #- metadata:
+    #    name: rook-minio-data2
+    #  spec:
+    #    accessModes: [ "ReadWriteOnce" ]
+    #    # Uncomment and specify your StorageClass, otherwise
+    #    # the cluster admin defined default StorageClass will be used.
+    #    #storageClassName: "your-cluster-storageclass"
+    #    resources:
+    #      requests:
+    #        storage: "8Gi"
+  # A key value list of annotations
+  annotations:
+  #  key: value
+  placement:
+    tolerations:
+    nodeAffinity:
+    podAffinity:
+    podAnyAffinity:
+  credentials:
+    name: minio-my-store-access-keys
+    namespace: rook-minio
+  clusterDomain:
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: minio-my-store
+spec:
+  type: NodePort
+  ports:
+    - port: 9000
+  selector:
+    app: minio
diff --git a/minio/operator.yaml b/minio/operator.yaml
new file mode 100755
index 0000000..3b347c5
--- /dev/null
+++ b/minio/operator.yaml
@@ -0,0 +1,99 @@
+apiVersion: apiextensions.k8s.io/v1beta1
+kind: CustomResourceDefinition
+metadata:
+  name: objectstores.minio.rook.io
+spec:
+  group: minio.rook.io
+  names:
+    kind: ObjectStore
+    listKind: ObjectStoreList
+    plural: objectstores
+    singular: objectstore
+  scope: Namespaced
+  version: v1alpha1
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRole
+metadata:
+  name: rook-minio-operator
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  - secrets
+  - pods
+  - services
+  verbs:
+  - get
+  - watch
+  - create
+  - update
+- apiGroups:
+  - apps
+  resources:
+  - statefulsets
+  verbs:
+  - get
+  - create
+  - update
+- apiGroups:
+  - minio.rook.io
+  resources:
+  - "*"
+  verbs:
+  - "*"
+- apiGroups:
+  - rook.io
+  resources:
+  - "*"
+  verbs:
+  - "*"
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: rook-minio-operator
+---
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1beta1
+metadata:
+  name: rook-minio-operator
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: rook-minio-operator
+subjects:
+- kind: ServiceAccount
+  name: rook-minio-operator
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: rook-minio-operator
+  labels:
+    app: rook-minio-operator
+spec:
+  selector:
+    matchLabels:
+      app: rook-minio-operator
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: rook-minio-operator
+    spec:
+      serviceAccountName: rook-minio-operator
+      containers:
+      - name: rook-minio-operator
+        image: rook/minio:v1.0.5
+        args: ["minio", "operator"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
diff --git a/ms-spark/README.md b/ms-spark/README.md
new file mode 100755
index 0000000..4dd4b37
--- /dev/null
+++ b/ms-spark/README.md
@@ -0,0 +1,21 @@
+# Steps to setup sparkwith s3 interop
+
+- `kubectl create configmap spark-conf --from-file=core-site.xml --from-file=log4j.properties --from-file=spark-defaults.conf --from-file=spark-env.sh -n bigdata` The first thing we do is create a configmap so we can mount these files directly into spark.
+- `helm install microsoft/spark --version 1.0.0 --namespace=bigdata`
+- Now, you need to *patch* the kubectl file so that it uses the volumeMounts as shown in the deployment.  
+That is, you need to `kubectl edit deployment spark -n bigdata` , such that the */opt/spark/conf* directory is mounted over by the above `spark-conf` config map, like so:
+```
+        volumeMounts:
+        - mountPath: /opt/spark/conf/
+          name: spark-conf-vol
+      dnsPolicy: ClusterFirst
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      securityContext: {}
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: spark-conf
+        name: spark-conf-vol
+```
diff --git a/ms-spark/core-site.xml b/ms-spark/core-site.xml
new file mode 100755
index 0000000..9307c4e
--- /dev/null
+++ b/ms-spark/core-site.xml
@@ -0,0 +1,49 @@
+	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+	  <configuration><property>                                                                                                                                                                        <name>fs.gs.impl</name>                                                                                                                                                                  
+	    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>                                                                                                                     
+	    <description>The FileSystem for gs: (GCS) uris.</description>                                                                                                                       
+	  </property>                                                                                                                              
+	  <property>                                                                                                                                                                                 
+	    <name>fs.AbstractFileSystem.gs.impl</name>                                                                                                                                               
+	    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>                                                                                                                             
+	    <description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>                                                                              
+	  </property>                                                                                
+	  <property>                                                                                                                                                                                 
+	    <name>fs.gs.project.id</name>                                                                                                                                                            
+	    <value>NOT_RUNNING_INSIDE_GCE</value>                                                                                                                                                   
+	  </property>                                                                                                                                                      
+	    <property>                                                                                                                                                                                 
+	    <name>fs.s3a.endpoint</name>                                                                                                                                                               
+	    <description>AWS S3 endpoint to connect to. An up-to-date list is                                                                                                                          
+	      provided in the AWS Documentation: regions and endpoints. Without this                                                                                                                   
+	      property, the standard region (s3.amazonaws.com) is assumed.                                                                                                                             
+	    </description>                                                                                                                                                                             
+	    <value>http://127.0.0.1:9000</value>                                                                                                                                                       
+	  </property>                                                                                                                                                                                  
+																								       
+	  <property>                                                                                                                                                                                   
+	    <name>fs.s3a.access.key</name>                                                                                                                                                             
+	    <description>AWS access key ID.</description>                                                                                                                                              
+	    <value>minio</value>                                                                                                                                                                       
+	  </property>                                                                                                                                                                                  
+																								       
+	  <property>                                                                                                                                                                                   
+	    <name>fs.s3a.secret.key</name>                                                                                                                                                             
+	    <description>AWS secret key.</description>                                                                                                                                                 
+	    <value>minio123</value>                                                                                                                                                                    
+	  </property>                                                                                                                                                                                  
+																								       
+	  <property>                                                                                                                                                                                   
+	    <name>fs.s3a.path.style.access</name>                                                                                                                                                      
+	    <value>true</value>                                                                                                                                                                        
+	    <description>Enable S3 path style access ie disabling the default virtual hosting behaviour.                                                                                               
+	      Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.                                                                                     
+	    </description>                                                                                                                                                                             
+	  </property>                                                                                                                                                                                  
+																								       
+	  <property>                                                                                                                                                                                   
+	    <name>fs.s3a.impl</name>                                                                                                                                                                   
+	    <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>                                                                                                                                      
+	    <description>The implementation class of the S3A Filesystem</description>                                                                                                                  
+	  </property>                                                                                                                                                                         																					       
+	</configuration>
diff --git a/ms-spark/log4j.properties b/ms-spark/log4j.properties
new file mode 100755
index 0000000..3a2a882
--- /dev/null
+++ b/ms-spark/log4j.properties
@@ -0,0 +1,12 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark-project.jetty=WARN
+log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/ms-spark/spark-defaults.conf b/ms-spark/spark-defaults.conf
new file mode 100755
index 0000000..5b3e62b
--- /dev/null
+++ b/ms-spark/spark-defaults.conf
@@ -0,0 +1 @@
+spark.app.id KubernetesSpark
diff --git a/ms-spark/spark-deployment.yaml b/ms-spark/spark-deployment.yaml
new file mode 100755
index 0000000..23567e7
--- /dev/null
+++ b/ms-spark/spark-deployment.yaml
@@ -0,0 +1,94 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  annotations:
+    deployment.kubernetes.io/revision: "4"
+  creationTimestamp: "2019-08-23T15:21:00Z"
+  generation: 4
+  labels:
+    chart: spark-1.0.0
+    component: fantastic-chipmunk-spark-master
+    heritage: Tiller
+    release: fantastic-chipmunk
+  name: fantastic-chipmunk-master
+  namespace: bigdata
+  resourceVersion: "98252"
+  selfLink: /apis/extensions/v1beta1/namespaces/bigdata/deployments/fantastic-chipmunk-master
+  uid: 579b17c2-15ae-463a-8dc1-561a1fc29c94
+spec:
+  progressDeadlineSeconds: 2147483647
+  replicas: 1
+  revisionHistoryLimit: 2147483647
+  selector:
+    matchLabels:
+      component: fantastic-chipmunk-spark-master
+  strategy:
+    rollingUpdate:
+      maxSurge: 1
+      maxUnavailable: 1
+    type: RollingUpdate
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        chart: spark-1.0.0
+        component: fantastic-chipmunk-spark-master
+        heritage: Tiller
+        release: fantastic-chipmunk
+    spec:
+      containers:
+      - args:
+        - echo $(hostname -i) fantastic-chipmunk-master >> /etc/hosts; /opt/spark/bin/spark-class
+          org.apache.spark.deploy.master.Master
+        command:
+        - /bin/sh
+        - -c
+        env:
+        - name: SPARK_DAEMON_MEMORY
+          value: 1g
+        - name: SPARK_MASTER_HOST
+          value: fantastic-chipmunk-master
+        - name: SPARK_MASTER_PORT
+          value: "7077"
+        - name: SPARK_MASTER_WEBUI_PORT
+          value: "8080"
+        image: mcr.microsoft.com/mmlspark/spark2.4:v4_mini
+        imagePullPolicy: IfNotPresent
+        name: fantastic-chipmunk-master
+        ports:
+        - containerPort: 7077
+          protocol: TCP
+        - containerPort: 8080
+          protocol: TCP
+        resources:
+          requests:
+            cpu: 100m
+            memory: 1Gi
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /opt/spark/conf/
+          name: spark-conf-vol
+      dnsPolicy: ClusterFirst
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      securityContext: {}
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: spark-conf
+        name: spark-conf-vol
+status:
+  availableReplicas: 1
+  conditions:
+  - lastTransitionTime: "2019-08-23T15:21:00Z"
+    lastUpdateTime: "2019-08-23T15:21:00Z"
+    message: Deployment has minimum availability.
+    reason: MinimumReplicasAvailable
+    status: "True"
+    type: Available
+  observedGeneration: 4
+  readyReplicas: 1
+  replicas: 1
+  updatedReplicas: 1
diff --git a/ms-spark/spark-env.sh b/ms-spark/spark-env.sh
new file mode 100755
index 0000000..6d1ab27
--- /dev/null
+++ b/ms-spark/spark-env.sh
@@ -0,0 +1 @@
+SPARK_DIST_CLASSPATH=/jars:/jars/*:/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/l
diff --git a/nifi/README.md b/nifi/README.md
new file mode 100755
index 0000000..5983a95
--- /dev/null
+++ b/nifi/README.md
@@ -0,0 +1,21 @@
+
+Zookeeper requires a persistentVolumeClaim, and the helm chart isn't editable after install becase
+statefulsets are immutable. 
+
+So, install a hostpath-provisioner or nfs provisioner via helm for this particular example.
+
+```
+helm upgrade --install hostpath-provisioner --namespace kube-system rimusz/hostpath-provisioner
+```
+
+Alternatively, you can manually provision hostpath volumes as shown in the YAML file.
+
+Greate work by cetic getting Nifi helmified ! Other then the statefulset issue for ZK which needs a
+volume, it deploys as is out of the box.
+ 
+```
+helm repo add cetic https://cetic.github.io/helm-charts
+helm repo update
+```
+
+
diff --git a/nifi/volume.yaml b/nifi/volume.yaml
new file mode 100755
index 0000000..2d44eec
--- /dev/null
+++ b/nifi/volume.yaml
@@ -0,0 +1,31 @@
+### Example of how to use a hostPath to implement Dynamic storage
+
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: hp-pv-001
+  labels:
+    type: local
+spec:
+  storageClassName: manual
+  capacity:
+    storage: 10Gi
+  accessModes:
+    - ReadWriteOnce
+  hostPath:
+    path: "/tmp/data01"
+
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: hp-pv-002
+  labels:
+    type: local
+spec:
+  storageClassName: manual
+  capacity:
+    storage: 10Gi
+  accessModes:
+    - ReadWriteOnce
+  hostPath:
+    path: "/tmp/data02"
diff --git a/presto3-minio/README.md b/presto3-minio/README.md
new file mode 100644
index 0000000..c7378b7
--- /dev/null
+++ b/presto3-minio/README.md
@@ -0,0 +1,32 @@
+# Setting up a data warehouse with Minio and Presto
+
+The test.sh script in this module demonstrates how to deploy minio alongside presto such that you
+can then use Minio as a Catalog inside of presto.
+
+After deployment of the presto coordinator (its run in standalone mode so it can work as a worker as well so
+workers aren't needed), you can `kubectl exec` into the presto container, and run the presto client.
+
+Then, list the catalogs:
+
+```
+presto> show CATALOGS;
+ Catalog 
+---------
+ minio
+ mysql
+ system
+ tpch
+(4 rows)
+
+Query 20190831_191859_00000_c7y4q, FINISHED, 1 node
+Splits: 19 total, 19 done (100.00%)
+0:02 [0 rows, 0B] [0 rows/s, 0B/s]
+```
+
+The minio catalog above is a queriable SQL data store which you can run regular presto queries against.
+
+The hive metastore is started by these images, which were originally created by engineers at Minio.
+
+For future work, we should create our own Minio images which can do this in a modular way, and
+put more configmap options into the containers themselves, possibly injecting many or all presto config via
+a config map YAML (as is done for spark/kafka/in this repo).
\ No newline at end of file
diff --git a/presto3-minio/coordinator/Dockerfile b/presto3-minio/coordinator/Dockerfile
new file mode 100755
index 0000000..ac9c71b
--- /dev/null
+++ b/presto3-minio/coordinator/Dockerfile
@@ -0,0 +1,6 @@
+FROM wlan0/presto-benchmark-environment:coordinator 
+COPY ./json /resources/nyc_taxi_data_3M.json
+COPY ./minio.properties /resources/presto-server-302-e.11/etc/catalog/minio.properties
+
+COPY config.properties /resources/presto-server-302-e.11/etc/config.properties 
+CMD cat   /resources/presto-server-302-e.11/etc/config.properties ;  /resources/presto-server-302-e.11/bin/launcher start && cat /resources/presto-server-302-e.11/etc/config.properties && echo "Hardcoded to s3://minio:9000 , using 'minio' and 'minio123' as the access credentials..." && sleep 60 && cat /resources/presto-server-302-e.11/etc/config.properties &&  tail -f ./resources/presto-server-302-e.11/var/log/server.log
diff --git a/presto3-minio/coordinator/README.md b/presto3-minio/coordinator/README.md
new file mode 100755
index 0000000..3423f65
--- /dev/null
+++ b/presto3-minio/coordinator/README.md
@@ -0,0 +1,8 @@
+# Use squash !
+
+Building this image, make sure you use squash so you can gut the 12 GB file :) 
+
+
+docker build --squash -t jayunit100/wlan0-presto:latest ./
+
+
diff --git a/presto3-minio/coordinator/config.properties b/presto3-minio/coordinator/config.properties
new file mode 100755
index 0000000..6a214ae
--- /dev/null
+++ b/presto3-minio/coordinator/config.properties
@@ -0,0 +1,8 @@
+coordinator=true
+node-scheduler.include-coordinator=true
+http-server.http.port=8080
+query.max-memory=1GB
+query.max-memory-per-node=1GB
+query.max-total-memory-per-node=1GB
+discovery-server.enabled=true
+discovery.uri=http://presto:8080
diff --git a/presto3-minio/coordinator/json b/presto3-minio/coordinator/json
new file mode 100755
index 0000000..6906dcd
--- /dev/null
+++ b/presto3-minio/coordinator/json
@@ -0,0 +1,2 @@
+{"id":"3418178","vendor_id":"1","pickup_datetime":"2014-03-13 20:17:16","dropoff_datetime":"2014-03-13 20:22:29","store_and_fwd_flag":"N","rate_code_id":"1","pickup_longitude":"-73.9873046875","pickup_latitude":"40.691890716552734","dropoff_longitude":"-73.975677490234375","dropoff_latitude":"40.696006774902344","passenger_count":"1","trip_distance":"0.90","fare_amount":"5.5","extra":"0.5","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","ehail_fee":"","improvement_surcharge":"","tota [...]
+
diff --git a/presto3-minio/coordinator/minio.properties b/presto3-minio/coordinator/minio.properties
new file mode 100755
index 0000000..917adc0
--- /dev/null
+++ b/presto3-minio/coordinator/minio.properties
@@ -0,0 +1,14 @@
+connector.name=hive-hadoop2
+hive.metastore=file
+hive.metastore.catalog.dir=s3://nyctaxidata/
+hive.allow-drop-table=true
+hive.s3.aws-access-key=minio
+hive.s3.aws-secret-key=minio123
+hive.s3.endpoint=http://minio:9000
+hive.s3.path-style-access=true
+hive.s3.ssl.enabled=false
+hive.s3.max-connections=4000
+hive.s3.multipart.min-file-size=512 MB
+hive.s3.multipart.min-part-size=512 MB
+hive.s3select-pushdown.enabled=true
+hive.compression-codec=SNAPPY
diff --git a/presto3-minio/presto-deployment-master.yaml b/presto3-minio/presto-deployment-master.yaml
new file mode 100755
index 0000000..332785f
--- /dev/null
+++ b/presto3-minio/presto-deployment-master.yaml
@@ -0,0 +1,26 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  annotations:
+  name: presto-master
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        app: presto
+    spec:
+      containers:
+      - image: jayunit100/wlan0-presto-coordinator:latest
+        name: presto-w
+        ports:
+        - containerPort: 8080
+        resources:
+          requests:
+            memory: "6G"
+      hostname: presto
+      restartPolicy: Always
+status: {}
diff --git a/presto3-minio/presto-deployment-worker.yaml b/presto3-minio/presto-deployment-worker.yaml
new file mode 100755
index 0000000..a0d2cdd
--- /dev/null
+++ b/presto3-minio/presto-deployment-worker.yaml
@@ -0,0 +1,26 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  annotations:
+  name: presto-worker
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        app: presto-worker
+    spec:
+      containers:
+      - image: jayunit100/wlan0-presto-worker:latest
+        name: presto-w
+        resources:
+          requests:
+            memory: "6G"
+        ports:
+        - containerPort: 8080
+      hostname: presto
+      restartPolicy: Always
+status: {}
diff --git a/presto3-minio/presto-deployment.yaml b/presto3-minio/presto-deployment.yaml
new file mode 100644
index 0000000..3e3c7da
--- /dev/null
+++ b/presto3-minio/presto-deployment.yaml
@@ -0,0 +1,23 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  annotations:
+  name: presto-worker
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        io.kompose.service: presto
+    spec:
+      containers:
+      - image: jayunit100/wlan0-presto-worker:latest
+        name: presto-w
+        ports:
+        - containerPort: 8080
+      hostname: presto
+      restartPolicy: Always
+status: {}
diff --git a/presto3-minio/service.yaml b/presto3-minio/service.yaml
new file mode 100644
index 0000000..ba79df0
--- /dev/null
+++ b/presto3-minio/service.yaml
@@ -0,0 +1,9 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: presto
+spec:
+  selector:
+    app: presto
+  ports: 
+    - port: 8080
diff --git a/presto3-minio/test.sh b/presto3-minio/test.sh
new file mode 100755
index 0000000..be3ffff
--- /dev/null
+++ b/presto3-minio/test.sh
@@ -0,0 +1,19 @@
+echo "ok to cleanup?"
+read x
+
+kubectl delete ns minio
+helm delete minio --purge
+
+
+
+kubectl create ns minio ; 
+# Sets up minio passwords...
+kubectl -n minio create secret generic my-minio-secret --from-literal=accesskey=minio --from-literal=secretkey=minio123 
+
+# Install minio
+helm install --set existingSecret=my-minio-secret stable/minio --namespace=minio --name=minio
+
+echo "installing via helm: minio"
+
+# Install presto w/ minio configured...
+kubectl create -f ./ -n minio
diff --git a/presto3-minio/worker/Dockerfile b/presto3-minio/worker/Dockerfile
new file mode 100755
index 0000000..6dcf74e
--- /dev/null
+++ b/presto3-minio/worker/Dockerfile
@@ -0,0 +1,5 @@
+FROM wlan0/presto-benchmark-environment:coordinator 
+COPY ./json /resources/nyc_taxi_data_3M.json
+COPY minio.properties /resources/presto-server-302-e.11/etc/catalog/minio.properties
+COPY config.properties /resources/presto-server-302-e.11/etc/config.properties 
+CMD cat   /resources/presto-server-302-e.11/etc/config.properties ;  /resources/presto-server-302-e.11/bin/launcher start && cat /resources/presto-server-302-e.11/etc/config.properties && echo "Hardcoded to s3://minio:9000 , using 'minio' and 'minio123' as the access credentials..." && sleep 60 && cat /resources/presto-server-302-e.11/etc/config.properties &&  tail -f ./resources/presto-server-302-e.11/var/log/server.log
diff --git a/presto3-minio/worker/README.md b/presto3-minio/worker/README.md
new file mode 100755
index 0000000..d5e6e16
--- /dev/null
+++ b/presto3-minio/worker/README.md
@@ -0,0 +1,6 @@
+# Use squash !
+
+Building this image, make sure you use squash so you can gut the 12 GB file :) 
+
+
+docker build --squash -t jayunit100/wlan0-presto-worker:latest ./
diff --git a/presto3-minio/worker/config.properties b/presto3-minio/worker/config.properties
new file mode 100755
index 0000000..0a089be
--- /dev/null
+++ b/presto3-minio/worker/config.properties
@@ -0,0 +1,7 @@
+coordinator=false
+node-scheduler.include-coordinator=false
+http-server.http.port=8080
+query.max-memory=1GB
+query.max-memory-per-node=1GB
+query.max-total-memory-per-node=1GB
+discovery.uri=http://presto-master:8080
diff --git a/presto3-minio/worker/json b/presto3-minio/worker/json
new file mode 100755
index 0000000..6906dcd
--- /dev/null
+++ b/presto3-minio/worker/json
@@ -0,0 +1,2 @@
+{"id":"3418178","vendor_id":"1","pickup_datetime":"2014-03-13 20:17:16","dropoff_datetime":"2014-03-13 20:22:29","store_and_fwd_flag":"N","rate_code_id":"1","pickup_longitude":"-73.9873046875","pickup_latitude":"40.691890716552734","dropoff_longitude":"-73.975677490234375","dropoff_latitude":"40.696006774902344","passenger_count":"1","trip_distance":"0.90","fare_amount":"5.5","extra":"0.5","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","ehail_fee":"","improvement_surcharge":"","tota [...]
+
diff --git a/presto3-minio/worker/minio.properties b/presto3-minio/worker/minio.properties
new file mode 100755
index 0000000..917adc0
--- /dev/null
+++ b/presto3-minio/worker/minio.properties
@@ -0,0 +1,14 @@
+connector.name=hive-hadoop2
+hive.metastore=file
+hive.metastore.catalog.dir=s3://nyctaxidata/
+hive.allow-drop-table=true
+hive.s3.aws-access-key=minio
+hive.s3.aws-secret-key=minio123
+hive.s3.endpoint=http://minio:9000
+hive.s3.path-style-access=true
+hive.s3.ssl.enabled=false
+hive.s3.max-connections=4000
+hive.s3.multipart.min-file-size=512 MB
+hive.s3.multipart.min-part-size=512 MB
+hive.s3select-pushdown.enabled=true
+hive.compression-codec=SNAPPY
diff --git a/storage-provisioner/README.md b/storage-provisioner/README.md
new file mode 100755
index 0000000..09abdd1
--- /dev/null
+++ b/storage-provisioner/README.md
@@ -0,0 +1,28 @@
+1. The easiest way to get simple PVCs working on a one node cluster:
+
+```
+helm upgrade --install hostpath-provisioner --namespace kube-system rimusz/hostpath-provisioner
+```
+
+2. NFS is better for a real cluster, and mostly as easy as hostpath-provisioner.
+
+```
+ helm install stable/nfs-server-provisioner
+```
+
+Alternatively, NFS or other proper distributed filesystems can be used
+out of the box.
+
+After installing the NFS provisioner, run:
+
+```
+
+kubectl patch storageclass nfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
+
+```
+
+Which will make dynamic volumes needed by ZK and so on provisioned
+via NFS.
+
+
+For Minio --- see the presto3-minio work, which includes helm instructions for minio alongside presto as well.