You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hbase.apache.org by GitBox <gi...@apache.org> on 2018/11/02 23:24:02 UTC

[GitHub] saintstack closed pull request #4: HBASE-21430 [hbase-connectors] Move hbase-spark* modules to hbase-connectors repo

saintstack closed pull request #4: HBASE-21430 [hbase-connectors] Move hbase-spark* modules to hbase-connectors repo
URL: https://github.com/apache/hbase-connectors/pull/4
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/README.md b/README.md
index c2b2071..b0159f9 100644
--- a/README.md
+++ b/README.md
@@ -3,3 +3,4 @@
 Connectors for [Apache HBase&trade;](https://hbase.apache.org) 
 
   * [Kafka Proxy](https://github.com/apache/hbase-connectors/tree/master/kafka)
+  * [Spark](https://github.com/apache/hbase-connectors/tree/master/spark)
diff --git a/kafka/hbase-kafka-proxy/pom.xml b/kafka/hbase-kafka-proxy/pom.xml
index 642f139..4a45ed7 100755
--- a/kafka/hbase-kafka-proxy/pom.xml
+++ b/kafka/hbase-kafka-proxy/pom.xml
@@ -33,10 +33,7 @@
   <description>Proxy that forwards HBase replication events to a Kakfa broker</description>
   <properties>
     <collections.version>4.1</collections.version>
-    <commons-lang3.version>3.6</commons-lang3.version>
-    <commons-io.version>2.5</commons-io.version>
     <kafka-clients.version>2.0.0</kafka-clients.version>
-    <commons-io.version>2.5</commons-io.version>
   </properties>
   <build>
     <plugins>
@@ -80,12 +77,6 @@
       <groupId>org.apache.hbase.connectors.kafka</groupId>
       <artifactId>hbase-kafka-model</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-annotations</artifactId>
@@ -106,17 +97,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
-      <version>${commons-lang3.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-collections4</artifactId>
-      <version>${collections.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>${commons-io.version}</version>
     </dependency>
   </dependencies>
 
diff --git a/kafka/pom.xml b/kafka/pom.xml
index c489122..5c4df9e 100644
--- a/kafka/pom.xml
+++ b/kafka/pom.xml
@@ -35,16 +35,9 @@
     <module>hbase-kafka-model</module>
     <module>hbase-kafka-proxy</module>
   </modules>
-  <properties>
-    <avro.version>1.7.7</avro.version>
-  </properties>
+  <properties />
   <dependencyManagement>
     <dependencies>
-      <dependency>
-        <groupId>org.apache.avro</groupId>
-        <artifactId>avro</artifactId>
-        <version>${avro.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.hbase.connectors.kafka</groupId>
         <artifactId>hbase-kafka-model</artifactId>
diff --git a/pom.xml b/pom.xml
index 0a6b39c..072a549 100755
--- a/pom.xml
+++ b/pom.xml
@@ -47,6 +47,7 @@
   </licenses>
   <modules>
     <module>kafka</module>
+    <module>spark</module>
     <module>hbase-connectors-assembly</module>
   </modules>
   <scm>
@@ -114,19 +115,61 @@
   <properties>
     <!-- See https://maven.apache.org/maven-ci-friendly.html -->
     <revision>1.0.0-SNAPSHOT</revision>
+    <os.maven.version>1.6.1</os.maven.version>
     <maven.javadoc.skip>true</maven.javadoc.skip>
     <maven.build.timestamp.format>yyyy-MM-dd'T'HH:mm</maven.build.timestamp.format>
     <buildDate>${maven.build.timestamp}</buildDate>
     <compileSource>1.8</compileSource>
     <java.min.version>${compileSource}</java.min.version>
     <maven.min.version>3.5.0</maven.min.version>
-    <hbase.version>2.1.0</hbase.version>
+    <hbase.version>3.0.0-SNAPSHOT</hbase.version>
     <maven.compiler.version>3.6.1</maven.compiler.version>
     <exec.maven.version>1.6.0</exec.maven.version>
     <audience-annotations.version>0.5.0</audience-annotations.version>
+    <avro.version>1.7.7</avro.version>
+    <junit.version>4.12</junit.version>
+    <commons-lang3.version>3.6</commons-lang3.version>
+    <slf4j.version>1.7.25</slf4j.version>
+    <commons-io.version>2.5</commons-io.version>
+    <checkstyle.version>8.11</checkstyle.version>
+    <maven.checkstyle.version>3.0.0</maven.checkstyle.version>
+    <external.protobuf.version>2.5.0</external.protobuf.version>
+    <servlet.api.version>3.1.0</servlet.api.version>
+    <!--Need profile for hadoop3. Need to do stuff like set netty
+         version in it... see how hbase/pom.xml does it.
+        <netty.hadoop.version>3.10.5.Final</netty.hadoop.version>
+        For now doing hadoop2 only.
+     -->
+    <hadoop-two.version>2.7.7</hadoop-two.version>
+    <hadoop.version>${hadoop-two.version}</hadoop.version>
+    <netty.hadoop.version>3.6.2.Final</netty.hadoop.version>
+    <!--The below compat.modules also needs to change-->
+    <compat.module>hbase-hadoop2-compat</compat.module>
   </properties>
   <dependencyManagement>
     <dependencies>
+      <dependency>
+        <groupId>org.apache.yetus</groupId>
+        <artifactId>audience-annotations</artifactId>
+        <version>${audience-annotations.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-annotations</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-annotations</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-annotations</artifactId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-annotations</artifactId>
@@ -134,12 +177,29 @@
         <type>test-jar</type>
         <scope>test</scope>
       </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-common</artifactId>
+        <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
       <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-common</artifactId>
         <version>${hbase.version}</version>
         <type>test-jar</type>
         <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.hbase</groupId>
@@ -147,9 +207,145 @@
         <version>${hbase.version}</version>
         <scope>provided</scope>
       </dependency>
+      <dependency>
+        <artifactId>hbase-server</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-client</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-protocol-shaded</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-protocol</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-testing-util</artifactId>
+        <version>${hbase.version}</version>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-it</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-mapreduce</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-mapreduce</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-zookeeper</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-zookeeper</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-hadoop-compat</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-hadoop-compat</artifactId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>${compat.module}</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>${compat.module}</artifactId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro</artifactId>
+        <version>${avro.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>${junit.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-lang3</artifactId>
+        <version>${commons-lang3.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-log4j12</artifactId>
+        <version>${slf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+        <version>${slf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-io</groupId>
+        <artifactId>commons-io</artifactId>
+        <version>${commons-io.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.google.protobuf</groupId>
+        <artifactId>protobuf-java</artifactId>
+        <version>${external.protobuf.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>javax.servlet</groupId>
+        <artifactId>javax.servlet-api</artifactId>
+        <version>${servlet.api.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
   <build>
+    <extensions>
+      <extension>
+        <groupId>kr.motd.maven</groupId>
+        <artifactId>os-maven-plugin</artifactId>
+        <version>${os.maven.version}</version>
+      </extension>
+    </extensions>
     <pluginManagement>
       <plugins>
         <!-- See https://maven.apache.org/maven-ci-friendly.html-->
@@ -254,6 +450,31 @@
             <timestampPropertyName>build.year</timestampPropertyName>
           </configuration>
         </plugin>
+        <plugin>
+          <!-- Approach followed here is roughly the same as mentioned here:
+          https://maven.apache.org/plugins/maven-checkstyle-plugin/examples/multi-module-config.html
+          -->
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-checkstyle-plugin</artifactId>
+          <version>${maven.checkstyle.version}</version>
+          <dependencies>
+            <dependency>
+              <groupId>org.apache.hbase</groupId>
+              <artifactId>hbase-checkstyle</artifactId>
+              <version>${project.version}</version>
+            </dependency>
+            <dependency>
+              <groupId>com.puppycrawl.tools</groupId>
+              <artifactId>checkstyle</artifactId>
+              <version>${checkstyle.version}</version>
+            </dependency>
+          </dependencies>
+          <configuration>
+            <configLocation>hbase/checkstyle.xml</configLocation>
+            <suppressionsLocation>hbase/checkstyle-suppressions.xml</suppressionsLocation>
+            <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          </configuration>
+        </plugin>
       </plugins>
     </pluginManagement>
     <plugins>
@@ -304,6 +525,54 @@
               </rules>
             </configuration>
           </execution>
+          <execution>
+            <id>banned-jsr305</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <bannedDependencies>
+                  <excludes>
+                    <exclude>com.google.code.findbugs:jsr305</exclude>
+                  </excludes>
+                  <message>We don't allow the JSR305 jar from the Findbugs project, see HBASE-16321.</message>
+                </bannedDependencies>
+              </rules>
+            </configuration>
+          </execution>
+          <execution>
+            <id>banned-scala</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <bannedDependencies>
+                  <excludes>
+                    <exclude>org.scala-lang:scala-library</exclude>
+                  </excludes>
+                  <message>We don't allow Scala outside of the hbase-spark module, see HBASE-13992.</message>
+                </bannedDependencies>
+              </rules>
+            </configuration>
+          </execution>
+          <execution>
+            <id>banned-hbase-spark</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <bannedDependencies>
+                  <excludes>
+                    <exclude>org.apache.hbase:hbase-spark</exclude>
+                  </excludes>
+                  <message>We don't allow other modules to depend on hbase-spark, see HBASE-13992.</message>
+                </bannedDependencies>
+              </rules>
+            </configuration>
+          </execution>
         </executions>
       </plugin>
     </plugins>
diff --git a/spark/README.md b/spark/README.md
new file mode 100755
index 0000000..dcd11c7
--- /dev/null
+++ b/spark/README.md
@@ -0,0 +1 @@
+# Apache HBase&trade; Spark Connector
diff --git a/spark/hbase-spark-it/pom.xml b/spark/hbase-spark-it/pom.xml
new file mode 100644
index 0000000..4be3daa
--- /dev/null
+++ b/spark/hbase-spark-it/pom.xml
@@ -0,0 +1,355 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hbase.connectors</groupId>
+    <artifactId>spark</artifactId>
+    <version>${revision}</version>
+    <relativePath>../</relativePath>
+  </parent>
+  <groupId>org.apache.hbase.connectors.spark</groupId>
+  <artifactId>hbase-spark-it</artifactId>
+  <name>Apache HBase - Spark Integration Tests</name>
+  <description>Integration and System tests for HBase</description>
+  <properties>
+    <spark.version>2.1.1</spark.version>
+    <!-- The following version is in sync with Spark's choice
+         Please take caution when this version is modified -->
+    <scala.version>2.11.8</scala.version>
+    <scala.binary.version>2.11</scala.binary.version>
+    <!-- Test inclusion patterns used by failsafe configuration -->
+    <unittest.include>**/Test*.java</unittest.include>
+    <integrationtest.include>**/IntegrationTest*.java</integrationtest.include>
+    <!-- To Run Tests with a particular Xmx Value use -Dfailsafe.Xmx=XXXg -->
+    <failsafe.Xmx>4g</failsafe.Xmx>
+    <!-- To run a single integration test, use -Dit.test=IntegrationTestXXX -->
+  </properties>
+  <build>
+    <pluginManagement>
+      <plugins>
+        <!-- Make a jar and put the sources in the jar -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-source-plugin</artifactId>
+        </plugin>
+        <plugin>
+          <!--Make it so assembly:single does nothing in here-->
+          <artifactId>maven-assembly-plugin</artifactId>
+          <configuration>
+            <skipAssembly>true</skipAssembly>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-failsafe-plugin</artifactId>
+          <version>${surefire.version}</version>
+          <dependencies>
+            <dependency>
+              <groupId>org.apache.maven.surefire</groupId>
+              <artifactId>surefire-junit4</artifactId>
+              <version>${surefire.version}</version>
+            </dependency>
+          </dependencies>
+          <configuration>
+            <includes>
+              <include>${integrationtest.include}</include>
+            </includes>
+            <excludes>
+              <exclude>${unittest.include}</exclude>
+              <exclude>**/*$*</exclude>
+            </excludes>
+            <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+            <failIfNoTests>false</failIfNoTests>
+            <testFailureIgnore>false</testFailureIgnore>
+          </configuration>
+          <executions>
+            <execution>
+              <id>integration-test</id>
+              <phase>integration-test</phase>
+              <goals>
+                <goal>integration-test</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>verify</id>
+              <phase>verify</phase>
+              <goals>
+                <goal>verify</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+
+    <plugins>
+      <!--  Run integration tests with mvn verify -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <configuration>
+          <skip>false</skip>
+          <forkMode>always</forkMode>
+          <!-- TODO: failsafe does timeout, but verify does not fail the build because of the timeout.
+               I believe it is a failsafe bug, we may consider using surefire -->
+          <forkedProcessTimeoutInSeconds>1800</forkedProcessTimeoutInSeconds>
+          <argLine>-enableassertions -Xmx${failsafe.Xmx}
+            -Djava.security.egd=file:/dev/./urandom -XX:+CMSClassUnloadingEnabled
+            -verbose:gc -XX:+PrintCommandLineFlags  -XX:+PrintFlagsFinal</argLine>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <executions>
+          <!-- hbase-spark is ok in this modules -->
+          <execution>
+            <id>banned-hbase-spark</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <skip>true</skip>
+            </configuration>
+          </execution>
+          <execution>
+            <id>banned-scala</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <skip>true</skip>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>create-mrapp-generated-classpath</id>
+            <phase>generate-test-resources</phase>
+            <goals>
+              <goal>build-classpath</goal>
+            </goals>
+            <configuration>
+              <!-- needed to run the unit test for DS to generate
+              the required classpath that is required in the env
+              of the launch container in the mini cluster
+              -->
+              <outputFile>${project.build.directory}/test-classes/spark-generated-classpath</outputFile>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <configuration>
+          <failOnViolation>true</failOnViolation>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>net.revelc.code</groupId>
+        <artifactId>warbucks-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <!-- Intra-project dependencies -->
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase.connectors.spark</groupId>
+      <artifactId>hbase-spark</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-it</artifactId>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>${compat.module}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-testing-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-miscellaneous</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>${hadoop-two.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.code.findbugs</groupId>
+          <artifactId>jsr305</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>${hadoop-two.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.code.findbugs</groupId>
+          <artifactId>jsr305</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
+    <!-- Hadoop needs Netty 3.x at test scope for the minicluster -->
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty</artifactId>
+      <version>${netty.hadoop.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <!-- make sure wrong scala version is not pulled in -->
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scala-library</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- make sure wrong scala version is not pulled in -->
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scalap</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.google.code.findbugs</groupId>
+          <artifactId>jsr305</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-xml_2.11</artifactId>
+      <version>1.0.4</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <type>test-jar</type>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <profiles>
+    <!-- Skip the tests in this module -->
+    <profile>
+      <id>skipIntegrationTests</id>
+      <activation>
+        <property>
+          <name>skipIntegrationTests</name>
+        </property>
+      </activation>
+      <properties>
+        <skipTests>true</skipTests>
+      </properties>
+    </profile>
+  </profiles>
+
+  <reporting>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-report-plugin</artifactId>
+        <version>2.7.2</version>
+        <reportSets>
+          <reportSet>
+            <id>spark-integration-tests</id>
+            <reports>
+              <report>report-only</report>
+            </reports>
+            <configuration>
+              <outputName>failsafe-report</outputName>
+              <reportsDirectories>
+                <reportsDirectory>${project.build.directory}/failsafe-reports</reportsDirectory>
+              </reportsDirectories>
+            </configuration>
+          </reportSet>
+        </reportSets>
+      </plugin>
+    </plugins>
+  </reporting>
+
+</project>
diff --git a/spark/hbase-spark-it/src/test/java/org/apache/hadoop/hbase/spark/IntegrationTestSparkBulkLoad.java b/spark/hbase-spark-it/src/test/java/org/apache/hadoop/hbase/spark/IntegrationTestSparkBulkLoad.java
new file mode 100644
index 0000000..e5a8ddd
--- /dev/null
+++ b/spark/hbase-spark-it/src/test/java/org/apache/hadoop/hbase/spark/IntegrationTestSparkBulkLoad.java
@@ -0,0 +1,677 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.IntegrationTestBase;
+import org.apache.hadoop.hbase.IntegrationTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Consistency;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.IntegrationTestBulkLoad;
+import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.RegionSplitter;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.spark.Partitioner;
+import org.apache.spark.SerializableWritable;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.VoidFunction;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
+
+/**
+ * Test Bulk Load and Spark on a distributed cluster.
+ * It starts an Spark job that creates linked chains.
+ * This test mimic {@link IntegrationTestBulkLoad} in mapreduce.
+ *
+ * Usage on cluster:
+ *   First add hbase related jars and hbase-spark.jar into spark classpath.
+ *
+ *   spark-submit --class org.apache.hadoop.hbase.spark.IntegrationTestSparkBulkLoad
+ *                HBASE_HOME/lib/hbase-spark-it-XXX-tests.jar -m slowDeterministic
+ *                -Dhbase.spark.bulkload.chainlength=300
+ */
+public class IntegrationTestSparkBulkLoad extends IntegrationTestBase {
+
+  private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestSparkBulkLoad.class);
+
+  // The number of partitions for random generated data
+  private static String BULKLOAD_PARTITIONS_NUM = "hbase.spark.bulkload.partitionsnum";
+  private static int DEFAULT_BULKLOAD_PARTITIONS_NUM = 3;
+
+  private static String BULKLOAD_CHAIN_LENGTH = "hbase.spark.bulkload.chainlength";
+  private static int DEFAULT_BULKLOAD_CHAIN_LENGTH = 200000;
+
+  private static String BULKLOAD_IMPORT_ROUNDS = "hbase.spark.bulkload.importround";
+  private static int DEFAULT_BULKLOAD_IMPORT_ROUNDS  = 1;
+
+  private static String CURRENT_ROUND_NUM = "hbase.spark.bulkload.current.roundnum";
+
+  private static String NUM_REPLICA_COUNT_KEY = "hbase.spark.bulkload.replica.countkey";
+  private static int DEFAULT_NUM_REPLICA_COUNT = 1;
+
+  private static String BULKLOAD_TABLE_NAME = "hbase.spark.bulkload.tableName";
+  private static String DEFAULT_BULKLOAD_TABLE_NAME = "IntegrationTestSparkBulkLoad";
+
+  private static String BULKLOAD_OUTPUT_PATH = "hbase.spark.bulkload.output.path";
+
+  private static final String OPT_LOAD = "load";
+  private static final String OPT_CHECK = "check";
+
+  private boolean load = false;
+  private boolean check = false;
+
+  private static final byte[] CHAIN_FAM  = Bytes.toBytes("L");
+  private static final byte[] SORT_FAM = Bytes.toBytes("S");
+  private static final byte[] DATA_FAM = Bytes.toBytes("D");
+
+  /**
+   * Running spark job to load data into hbase table
+   */
+  public void runLoad() throws Exception {
+    setupTable();
+    int numImportRounds = getConf().getInt(BULKLOAD_IMPORT_ROUNDS, DEFAULT_BULKLOAD_IMPORT_ROUNDS);
+    LOG.info("Running load with numIterations:" + numImportRounds);
+    for (int i = 0; i < numImportRounds; i++) {
+      runLinkedListSparkJob(i);
+    }
+  }
+
+  /**
+   * Running spark job to create LinkedList for testing
+   * @param iteration iteration th of this job
+   * @throws Exception if an HBase operation or getting the test directory fails
+   */
+  public void runLinkedListSparkJob(int iteration) throws Exception {
+    String jobName =  IntegrationTestSparkBulkLoad.class.getSimpleName() + " _load " +
+        EnvironmentEdgeManager.currentTime();
+
+    LOG.info("Running iteration " + iteration + "in Spark Job");
+
+    Path output = null;
+    if (conf.get(BULKLOAD_OUTPUT_PATH) == null) {
+      output = util.getDataTestDirOnTestFS(getTablename() + "-" + iteration);
+    } else {
+      output = new Path(conf.get(BULKLOAD_OUTPUT_PATH));
+    }
+
+    SparkConf sparkConf = new SparkConf().setAppName(jobName).setMaster("local");
+    Configuration hbaseConf = new Configuration(getConf());
+    hbaseConf.setInt(CURRENT_ROUND_NUM, iteration);
+    int partitionNum = hbaseConf.getInt(BULKLOAD_PARTITIONS_NUM, DEFAULT_BULKLOAD_PARTITIONS_NUM);
+
+
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, hbaseConf);
+
+
+    LOG.info("Partition RDD into " + partitionNum + " parts");
+    List<String> temp = new ArrayList<>();
+    JavaRDD<List<byte[]>> rdd = jsc.parallelize(temp, partitionNum).
+        mapPartitionsWithIndex(new LinkedListCreationMapper(new SerializableWritable<>(hbaseConf)),
+                false);
+
+    hbaseContext.bulkLoad(rdd, getTablename(), new ListToKeyValueFunc(), output.toUri().getPath(),
+        new HashMap<>(), false, HConstants.DEFAULT_MAX_FILE_SIZE);
+
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+        Admin admin = conn.getAdmin();
+        Table table = conn.getTable(getTablename());
+        RegionLocator regionLocator = conn.getRegionLocator(getTablename())) {
+      // Create a new loader.
+      LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
+
+      // Load the HFiles into table.
+      loader.doBulkLoad(output, admin, table, regionLocator);
+    }
+
+
+    // Delete the files.
+    util.getTestFileSystem().delete(output, true);
+    jsc.close();
+  }
+
+  // See mapreduce.IntegrationTestBulkLoad#LinkedListCreationMapper
+  // Used to generate test data
+  public static class LinkedListCreationMapper implements
+      Function2<Integer, Iterator<String>, Iterator<List<byte[]>>> {
+
+    SerializableWritable swConfig = null;
+    private Random rand = new Random();
+
+    public LinkedListCreationMapper(SerializableWritable conf) {
+      this.swConfig = conf;
+    }
+
+    @Override
+    public Iterator<List<byte[]>> call(Integer v1, Iterator v2) throws Exception {
+      Configuration config = (Configuration) swConfig.value();
+      int partitionId = v1.intValue();
+      LOG.info("Starting create List in Partition " + partitionId);
+
+      int partitionNum = config.getInt(BULKLOAD_PARTITIONS_NUM, DEFAULT_BULKLOAD_PARTITIONS_NUM);
+      int chainLength = config.getInt(BULKLOAD_CHAIN_LENGTH, DEFAULT_BULKLOAD_CHAIN_LENGTH);
+      int iterationsNum = config.getInt(BULKLOAD_IMPORT_ROUNDS, DEFAULT_BULKLOAD_IMPORT_ROUNDS);
+      int iterationsCur = config.getInt(CURRENT_ROUND_NUM, 0);
+      List<List<byte[]>> res = new LinkedList<>();
+
+
+      long tempId = partitionId + iterationsCur * partitionNum;
+      long totalPartitionNum = partitionNum * iterationsNum;
+      long chainId = Math.abs(rand.nextLong());
+      chainId = chainId - (chainId % totalPartitionNum) + tempId;
+
+      byte[] chainIdArray = Bytes.toBytes(chainId);
+      long currentRow = 0;
+      long nextRow = getNextRow(0, chainLength);
+      for(long i = 0; i < chainLength; i++) {
+        byte[] rk = Bytes.toBytes(currentRow);
+        // Insert record into a list
+        List<byte[]> tmp1 = Arrays.asList(rk, CHAIN_FAM, chainIdArray, Bytes.toBytes(nextRow));
+        List<byte[]> tmp2 = Arrays.asList(rk, SORT_FAM, chainIdArray, Bytes.toBytes(i));
+        List<byte[]> tmp3 = Arrays.asList(rk, DATA_FAM, chainIdArray, Bytes.toBytes(
+            RandomStringUtils.randomAlphabetic(50)));
+        res.add(tmp1);
+        res.add(tmp2);
+        res.add(tmp3);
+
+        currentRow = nextRow;
+        nextRow = getNextRow(i+1, chainLength);
+      }
+      return res.iterator();
+    }
+
+    /** Returns a unique row id within this chain for this index */
+    private long getNextRow(long index, long chainLength) {
+      long nextRow = Math.abs(new Random().nextLong());
+      // use significant bits from the random number, but pad with index to ensure it is unique
+      // this also ensures that we do not reuse row = 0
+      // row collisions from multiple mappers are fine, since we guarantee unique chainIds
+      nextRow = nextRow - (nextRow % chainLength) + index;
+      return nextRow;
+    }
+  }
+
+
+
+  public static class ListToKeyValueFunc implements
+      Function<List<byte[]>, Pair<KeyFamilyQualifier, byte[]>> {
+    @Override
+    public Pair<KeyFamilyQualifier, byte[]> call(List<byte[]> v1) throws Exception {
+      if (v1 == null || v1.size() != 4) {
+        return null;
+      }
+      KeyFamilyQualifier kfq = new KeyFamilyQualifier(v1.get(0), v1.get(1), v1.get(2));
+
+      return new Pair<>(kfq, v1.get(3));
+    }
+  }
+
+  /**
+   * After adding data to the table start a mr job to check the bulk load.
+   */
+  public void runCheck() throws Exception {
+    LOG.info("Running check");
+    String jobName = IntegrationTestSparkBulkLoad.class.getSimpleName() + "_check" +
+            EnvironmentEdgeManager.currentTime();
+
+    SparkConf sparkConf = new SparkConf().setAppName(jobName).setMaster("local");
+    Configuration hbaseConf = new Configuration(getConf());
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, hbaseConf);
+
+    Scan scan = new Scan();
+    scan.addFamily(CHAIN_FAM);
+    scan.addFamily(SORT_FAM);
+    scan.setMaxVersions(1);
+    scan.setCacheBlocks(false);
+    scan.setBatch(1000);
+    int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, DEFAULT_NUM_REPLICA_COUNT);
+    if (replicaCount != DEFAULT_NUM_REPLICA_COUNT) {
+      scan.setConsistency(Consistency.TIMELINE);
+    }
+
+    // 1. Using TableInputFormat to get data from HBase table
+    // 2. Mimic LinkedListCheckingMapper in mapreduce.IntegrationTestBulkLoad
+    // 3. Sort LinkKey by its order ID
+    // 4. Group LinkKey if they have same chainId, and repartition RDD by NaturalKeyPartitioner
+    // 5. Check LinkList in each Partition using LinkedListCheckingFlatMapFunc
+    hbaseContext.hbaseRDD(getTablename(), scan).flatMapToPair(new LinkedListCheckingFlatMapFunc())
+        .sortByKey()
+        .combineByKey(new createCombinerFunc(), new mergeValueFunc(), new mergeCombinersFunc(),
+            new NaturalKeyPartitioner(new SerializableWritable<>(hbaseConf)))
+        .foreach(new LinkedListCheckingForeachFunc(new SerializableWritable<>(hbaseConf)));
+    jsc.close();
+  }
+
+  private void runCheckWithRetry() throws Exception {
+    try {
+      runCheck();
+    } catch (Throwable t) {
+      LOG.warn("Received " + StringUtils.stringifyException(t));
+      LOG.warn("Running the check MR Job again to see whether an ephemeral problem or not");
+      runCheck();
+      throw t; // we should still fail the test even if second retry succeeds
+    }
+    // everything green
+  }
+
+  /**
+   * PairFlatMapFunction used to transfer {@code <Row, Result>} to
+   * {@code Tuple<SparkLinkKey, SparkLinkChain>}.
+   */
+  public static class LinkedListCheckingFlatMapFunc implements
+      PairFlatMapFunction<Tuple2<ImmutableBytesWritable, Result>, SparkLinkKey, SparkLinkChain> {
+
+    @Override
+    public Iterator<Tuple2<SparkLinkKey, SparkLinkChain>> call(Tuple2<ImmutableBytesWritable,
+            Result> v) throws Exception {
+      Result value = v._2();
+      long longRk = Bytes.toLong(value.getRow());
+      List<Tuple2<SparkLinkKey, SparkLinkChain>> list = new LinkedList<>();
+
+      for (Map.Entry<byte[], byte[]> entry : value.getFamilyMap(CHAIN_FAM).entrySet()) {
+        long chainId = Bytes.toLong(entry.getKey());
+        long next = Bytes.toLong(entry.getValue());
+        Cell c = value.getColumnCells(SORT_FAM, entry.getKey()).get(0);
+        long order = Bytes.toLong(CellUtil.cloneValue(c));
+        Tuple2<SparkLinkKey, SparkLinkChain> tuple2 =
+            new Tuple2<>(new SparkLinkKey(chainId, order), new SparkLinkChain(longRk, next));
+        list.add(tuple2);
+      }
+      return list.iterator();
+    }
+  }
+
+  public static class createCombinerFunc implements
+      Function<SparkLinkChain, List<SparkLinkChain>> {
+    @Override
+    public List<SparkLinkChain> call(SparkLinkChain v1) throws Exception {
+      List<SparkLinkChain> list = new LinkedList<>();
+      list.add(v1);
+      return list;
+    }
+  }
+
+  public static class mergeValueFunc implements
+      Function2<List<SparkLinkChain>, SparkLinkChain, List<SparkLinkChain>> {
+    @Override
+    public List<SparkLinkChain> call(List<SparkLinkChain> v1, SparkLinkChain v2) throws Exception {
+      if (v1 == null) {
+        v1 = new LinkedList<>();
+      }
+
+      v1.add(v2);
+      return v1;
+    }
+  }
+
+  public static class mergeCombinersFunc implements
+      Function2<List<SparkLinkChain>, List<SparkLinkChain>, List<SparkLinkChain>> {
+    @Override
+    public List<SparkLinkChain> call(List<SparkLinkChain> v1, List<SparkLinkChain> v2)
+            throws Exception {
+      v1.addAll(v2);
+      return v1;
+    }
+  }
+
+  /**
+   * Class to figure out what partition to send a link in the chain to.  This is based upon
+   * the linkKey's ChainId.
+   */
+  public static class NaturalKeyPartitioner extends Partitioner {
+
+    private int numPartions = 0;
+    public NaturalKeyPartitioner(SerializableWritable swConf) {
+      Configuration hbaseConf = (Configuration) swConf.value();
+      numPartions = hbaseConf.getInt(BULKLOAD_PARTITIONS_NUM, DEFAULT_BULKLOAD_PARTITIONS_NUM);
+
+    }
+
+    @Override
+    public int numPartitions() {
+      return numPartions;
+    }
+
+    @Override
+    public int getPartition(Object key) {
+      if (!(key instanceof SparkLinkKey)) {
+        return -1;
+      }
+
+      int hash = ((SparkLinkKey) key).getChainId().hashCode();
+      return Math.abs(hash % numPartions);
+
+    }
+  }
+
+  /**
+   * Sort all LinkChain for one LinkKey, and test {@code List<LinkChain>}.
+   */
+  public static class LinkedListCheckingForeachFunc
+      implements VoidFunction<Tuple2<SparkLinkKey, List<SparkLinkChain>>> {
+
+    private  SerializableWritable swConf = null;
+
+    public LinkedListCheckingForeachFunc(SerializableWritable conf) {
+      swConf = conf;
+    }
+
+    @Override
+    public void call(Tuple2<SparkLinkKey, List<SparkLinkChain>> v1) throws Exception {
+      long next = -1L;
+      long prev = -1L;
+      long count = 0L;
+
+      SparkLinkKey key = v1._1();
+      List<SparkLinkChain> values = v1._2();
+
+      for (SparkLinkChain lc : values) {
+
+        if (next == -1) {
+          if (lc.getRk() != 0L) {
+            String msg = "Chains should all start at rk 0, but read rk " + lc.getRk()
+                + ". Chain:" + key.getChainId() + ", order:" + key.getOrder();
+            throw new RuntimeException(msg);
+          }
+          next = lc.getNext();
+        } else {
+          if (next != lc.getRk()) {
+            String msg = "Missing a link in the chain. Prev rk " + prev + " was, expecting "
+                + next + " but got " + lc.getRk() + ". Chain:" + key.getChainId()
+                + ", order:" + key.getOrder();
+            throw new RuntimeException(msg);
+          }
+          prev = lc.getRk();
+          next = lc.getNext();
+        }
+        count++;
+      }
+      Configuration hbaseConf = (Configuration) swConf.value();
+      int expectedChainLen = hbaseConf.getInt(BULKLOAD_CHAIN_LENGTH, DEFAULT_BULKLOAD_CHAIN_LENGTH);
+      if (count != expectedChainLen) {
+        String msg = "Chain wasn't the correct length.  Expected " + expectedChainLen + " got "
+            + count + ". Chain:" + key.getChainId() + ", order:" + key.getOrder();
+        throw new RuntimeException(msg);
+      }
+    }
+  }
+
+  /**
+   * Writable class used as the key to group links in the linked list.
+   *
+   * Used as the key emited from a pass over the table.
+   */
+  public static class SparkLinkKey implements java.io.Serializable, Comparable<SparkLinkKey> {
+
+    private Long chainId;
+    private Long order;
+
+    public Long getOrder() {
+      return order;
+    }
+
+    public Long getChainId() {
+      return chainId;
+    }
+
+    public SparkLinkKey(long chainId, long order) {
+      this.chainId = chainId;
+      this.order = order;
+    }
+
+    @Override
+    public int hashCode() {
+      return this.getChainId().hashCode();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof SparkLinkKey)) {
+        return false;
+      }
+
+      SparkLinkKey otherKey = (SparkLinkKey) other;
+      return this.getChainId().equals(otherKey.getChainId());
+    }
+
+    @Override
+    public int compareTo(SparkLinkKey other) {
+      int res = getChainId().compareTo(other.getChainId());
+
+      if (res == 0) {
+        res = getOrder().compareTo(other.getOrder());
+      }
+
+      return res;
+    }
+  }
+
+  /**
+   * Writable used as the value emitted from a pass over the hbase table.
+   */
+  public static class SparkLinkChain implements java.io.Serializable, Comparable<SparkLinkChain>{
+
+    public Long getNext() {
+      return next;
+    }
+
+    public Long getRk() {
+      return rk;
+    }
+
+
+    public SparkLinkChain(Long rk, Long next) {
+      this.rk = rk;
+      this.next = next;
+    }
+
+    private Long rk;
+    private Long next;
+
+    @Override
+    public int compareTo(SparkLinkChain linkChain) {
+      int res = getRk().compareTo(linkChain.getRk());
+      if (res == 0) {
+        res = getNext().compareTo(linkChain.getNext());
+      }
+      return res;
+    }
+
+    @Override
+    public int hashCode() {
+      return getRk().hashCode() ^ getNext().hashCode();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof SparkLinkChain)) {
+        return false;
+      }
+
+      SparkLinkChain otherKey = (SparkLinkChain) other;
+      return this.getRk().equals(otherKey.getRk()) && this.getNext().equals(otherKey.getNext());
+    }
+  }
+
+
+  /**
+   * Allow the scan to go to replica, this would not affect the runCheck()
+   * Since data are BulkLoaded from HFile into table
+   * @throws IOException if an HBase operation fails
+   * @throws InterruptedException if modifying the table fails
+   */
+  private void installSlowingCoproc() throws IOException, InterruptedException {
+    int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, DEFAULT_NUM_REPLICA_COUNT);
+
+    if (replicaCount == DEFAULT_NUM_REPLICA_COUNT) {
+      return;
+    }
+
+    TableName t = getTablename();
+    Admin admin = util.getAdmin();
+    HTableDescriptor desc = admin.getTableDescriptor(t);
+    desc.addCoprocessor(IntegrationTestBulkLoad.SlowMeCoproScanOperations.class.getName());
+    HBaseTestingUtility.modifyTableSync(admin, desc);
+  }
+
+  @Test
+  public void testBulkLoad() throws Exception {
+    runLoad();
+    installSlowingCoproc();
+    runCheckWithRetry();
+  }
+
+
+  private byte[][] getSplits(int numRegions) {
+    RegionSplitter.UniformSplit split = new RegionSplitter.UniformSplit();
+    split.setFirstRow(Bytes.toBytes(0L));
+    split.setLastRow(Bytes.toBytes(Long.MAX_VALUE));
+    return split.split(numRegions);
+  }
+
+  private void setupTable() throws IOException, InterruptedException {
+    if (util.getAdmin().tableExists(getTablename())) {
+      util.deleteTable(getTablename());
+    }
+
+    util.createTable(
+        getTablename(),
+        new byte[][]{CHAIN_FAM, SORT_FAM, DATA_FAM},
+        getSplits(16)
+    );
+
+    int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, DEFAULT_NUM_REPLICA_COUNT);
+
+    if (replicaCount == DEFAULT_NUM_REPLICA_COUNT) {
+      return;
+    }
+
+    TableName t = getTablename();
+    HBaseTestingUtility.setReplicas(util.getAdmin(), t, replicaCount);
+  }
+
+  @Override
+  public void setUpCluster() throws Exception {
+    util = getTestingUtil(getConf());
+    util.initializeCluster(1);
+    int replicaCount = getConf().getInt(NUM_REPLICA_COUNT_KEY, DEFAULT_NUM_REPLICA_COUNT);
+    if (LOG.isDebugEnabled() && replicaCount != DEFAULT_NUM_REPLICA_COUNT) {
+      LOG.debug("Region Replicas enabled: " + replicaCount);
+    }
+
+    // Scale this up on a real cluster
+    if (util.isDistributedCluster()) {
+      util.getConfiguration().setIfUnset(BULKLOAD_PARTITIONS_NUM,
+              String.valueOf(DEFAULT_BULKLOAD_PARTITIONS_NUM));
+      util.getConfiguration().setIfUnset(BULKLOAD_IMPORT_ROUNDS, "1");
+    } else {
+      util.startMiniMapReduceCluster();
+    }
+  }
+
+  @Override
+  protected void addOptions() {
+    super.addOptions();
+    super.addOptNoArg(OPT_CHECK, "Run check only");
+    super.addOptNoArg(OPT_LOAD, "Run load only");
+  }
+
+  @Override
+  protected void processOptions(CommandLine cmd) {
+    super.processOptions(cmd);
+    check = cmd.hasOption(OPT_CHECK);
+    load = cmd.hasOption(OPT_LOAD);
+  }
+
+  @Override
+  public int runTestFromCommandLine() throws Exception {
+    if (load) {
+      runLoad();
+    } else if (check) {
+      installSlowingCoproc();
+      runCheckWithRetry();
+    } else {
+      testBulkLoad();
+    }
+    return 0;
+  }
+
+  @Override
+  public TableName getTablename() {
+    return getTableName(getConf());
+  }
+
+  public static TableName getTableName(Configuration conf) {
+    return TableName.valueOf(conf.get(BULKLOAD_TABLE_NAME, DEFAULT_BULKLOAD_TABLE_NAME));
+  }
+
+  @Override
+  protected Set<String> getColumnFamilies() {
+    return Sets.newHashSet(Bytes.toString(CHAIN_FAM) , Bytes.toString(DATA_FAM),
+        Bytes.toString(SORT_FAM));
+  }
+
+  public static void main(String[] args) throws Exception {
+    Configuration conf = HBaseConfiguration.create();
+    IntegrationTestingUtility.setUseDistributedCluster(conf);
+    int status =  ToolRunner.run(conf, new IntegrationTestSparkBulkLoad(), args);
+    System.exit(status);
+  }
+}
diff --git a/spark/hbase-spark-it/src/test/resources/hbase-site.xml b/spark/hbase-spark-it/src/test/resources/hbase-site.xml
new file mode 100644
index 0000000..99d2ab8
--- /dev/null
+++ b/spark/hbase-spark-it/src/test/resources/hbase-site.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hbase.hconnection.threads.keepalivetime</name>
+    <value>3</value>
+  </property>
+</configuration>
diff --git a/spark/hbase-spark/README.txt b/spark/hbase-spark/README.txt
new file mode 100644
index 0000000..7fad811
--- /dev/null
+++ b/spark/hbase-spark/README.txt
@@ -0,0 +1,6 @@
+ON PROTOBUFS
+This maven module has core protobuf definition files ('.protos') used by hbase
+Spark that ship with hbase core including tests. 
+
+Generation of java files from protobuf .proto files included here is done as
+part of the build.
diff --git a/spark/hbase-spark/pom.xml b/spark/hbase-spark/pom.xml
new file mode 100644
index 0000000..cda6c7b
--- /dev/null
+++ b/spark/hbase-spark/pom.xml
@@ -0,0 +1,764 @@
+<?xml version="1.0"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hbase.connectors</groupId>
+    <artifactId>spark</artifactId>
+    <version>${revision}</version>
+    <relativePath>../</relativePath>
+  </parent>
+  <groupId>org.apache.hbase.connectors.spark</groupId>
+  <artifactId>hbase-spark</artifactId>
+  <name>Apache HBase - Spark</name>
+  <properties>
+    <spark.version>2.1.1</spark.version>
+    <!-- The following version is in sync with Spark's choice
+         Please take caution when this version is modified -->
+    <scala.version>2.11.8</scala.version>
+    <scala.binary.version>2.11</scala.binary.version>
+    <top.dir>${project.basedir}/..</top.dir>
+  </properties>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-miscellaneous</artifactId>
+    </dependency>
+    <!-- Force import of Spark's servlet API for unit tests -->
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- Mark Spark / Scala as provided -->
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+      <version>${scala.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <!-- make sure wrong scala version is not pulled in -->
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scala-library</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- make sure wrong scala version is not pulled in -->
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scalap</artifactId>
+        </exclusion>
+        <exclusion>
+           <groupId>com.google.code.findbugs</groupId>
+           <artifactId>jsr305</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- exclude the wrong snappy-java version in spark-1.6 -->
+          <groupId>org.xerial.snappy</groupId>
+          <artifactId>snappy-java</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xerces</groupId>
+          <artifactId>xercesImpl</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <version>1.1.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <type>test-jar</type>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <version>2.2.4</version>
+      <scope>test</scope>
+      <exclusions>
+       <exclusion>
+         <groupId>org.scala-lang</groupId>
+         <artifactId>scala-library</artifactId>
+       </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalamock</groupId>
+      <artifactId>scalamock-scalatest-support_${scala.binary.version}</artifactId>
+      <version>3.1.4</version>
+      <scope>test</scope>
+      <exclusions>
+       <exclusion>
+         <groupId>org.scala-lang</groupId>
+         <artifactId>scala-library</artifactId>
+       </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.module</groupId>
+      <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
+      <version>${jackson.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scala-library</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scala-reflect</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>thrift</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-api-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>servlet-api-2.5</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-protocol</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.yetus</groupId>
+      <artifactId>audience-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-annotations</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop-compat</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>thrift</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+        <exclusion>
+<<<<<<< HEAD
+=======
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-api</artifactId>
+        </exclusion>
+        <exclusion>
+>>>>>>> HBASE-21430 [hbase-connectors] Move hbase-spark* modules to hbase-connectors repo
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-api-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>servlet-api-2.5</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop2-compat</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>thrift</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-api-2.1</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>servlet-api-2.5</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-zookeeper</artifactId>
+<<<<<<< HEAD
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-zookeeper</artifactId>
+=======
+>>>>>>> HBASE-21430 [hbase-connectors] Move hbase-spark* modules to hbase-connectors repo
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+      <!-- clover fails due to scala/java cross compile.  This guarantees that the scala is
+             compiled before the java that will be evaluated by code coverage (scala will not be).
+            https://confluence.atlassian.com/display/CLOVERKB/Java-+Scala+cross-compilation+error+-+cannot+find+symbol
+            -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-source</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-test-source</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/test/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.xolstice.maven.plugins</groupId>
+        <artifactId>protobuf-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>compile-protoc</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <executions>
+          <!-- scala is ok in the spark modules -->
+          <execution>
+            <id>banned-scala</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <skip>true</skip>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <configuration>
+          <failOnViolation>true</failOnViolation>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>net.revelc.code</groupId>
+        <artifactId>warbucks-maven-plugin</artifactId>
+<<<<<<< HEAD
+=======
+        <version>1.1.0</version>
+>>>>>>> HBASE-21430 [hbase-connectors] Move hbase-spark* modules to hbase-connectors repo
+        <!-- TODO: remove the following config if https://issues.scala-lang.org/browse/SI-3600 is resolved -->
+        <!-- override the root config to add more filter -->
+        <configuration>
+          <ignoreRuleFailures>true</ignoreRuleFailures>
+          <rules>
+            <rule>
+              <!-- exclude the generated java files and package object-->
+              <classPattern>(?!.*(.generated.|.tmpl.|\$|org.apache.hadoop.hbase.spark.hbase.package)).*</classPattern>
+              <includeTestClasses>false</includeTestClasses>
+              <includePublicClasses>true</includePublicClasses>
+              <includePackagePrivateClasses>false</includePackagePrivateClasses>
+              <includeProtectedClasses>false</includeProtectedClasses>
+              <includePrivateClasses>false</includePrivateClasses>
+              <classAnnotationPattern>org[.]apache[.]yetus[.]audience[.]InterfaceAudience.*</classAnnotationPattern>
+            </rule>
+          </rules>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <profiles>
+    <!-- Skip the tests in this module -->
+    <profile>
+      <id>skipSparkTests</id>
+      <activation>
+        <property>
+          <name>skipSparkTests</name>
+        </property>
+      </activation>
+      <properties>
+        <surefire.skipFirstPart>true</surefire.skipFirstPart>
+        <surefire.skipSecondPart>true</surefire.skipSecondPart>
+        <skipTests>true</skipTests>
+      </properties>
+    </profile>
+    <!-- profile against Hadoop 2.x: This is the default. -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
+          <!--h2-->
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <version>${hadoop-two.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-two.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-two.version}</version>
+          <type>test-jar</type>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-hdfs</artifactId>
+          <version>${hadoop-two.version}</version>
+          <type>test-jar</type>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>xerces</groupId>
+              <artifactId>xercesImpl</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-minikdc</artifactId>
+          <version>${hadoop-two.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!--
+      profile for building against Hadoop 3.0.x. Activate using:
+       mvn -Dhadoop.profile=3.0
+    -->
+    <profile>
+      <id>hadoop-3.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>3.0</value>
+        </property>
+      </activation>
+      <properties>
+        <hadoop.version>3.0</hadoop.version>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <version>${hadoop-three.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-three.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-three.version}</version>
+          <type>test-jar</type>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-hdfs</artifactId>
+          <version>${hadoop-three.version}</version>
+          <type>test-jar</type>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-minikdc</artifactId>
+          <version>${hadoop-three.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!-- Attempt to skip scala-maven-plugin work, see
+      https://github.com/davidB/scala-maven-plugin/issues/198
+      -->
+    <!-- 'scala.skip' is used by the website generation script on jenkins to
+         mitigate the impact of unneeded build forks while building our javadocs.
+      -->
+    <profile>
+      <id>build-scala-sources</id>
+      <activation>
+        <property>
+          <name>scala.skip</name>
+          <value>!true</value>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+        <version>3.2.0</version>
+        <configuration>
+          <charset>${project.build.sourceEncoding}</charset>
+          <scalaVersion>${scala.version}</scalaVersion>
+          <args>
+            <arg>-feature</arg>
+          </args>
+        </configuration>
+        <executions>
+          <execution>
+            <id>scala-compile-first</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>add-source</goal>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>scala-test-compile</id>
+            <phase>process-test-resources</phase>
+            <goals>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <version>1.0</version>
+        <configuration>
+          <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+          <junitxml>.</junitxml>
+          <filereports>WDF TestSuite.txt</filereports>
+          <parallel>false</parallel>
+        </configuration>
+        <executions>
+          <execution>
+            <id>test</id>
+            <phase>test</phase>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <configuration>
+              <argLine>-Xmx1536m -XX:ReservedCodeCacheSize=512m</argLine>
+              <parallel>false</parallel>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java
new file mode 100644
index 0000000..a17d2e6
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.InvalidProtocolBufferException;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
+import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.spark.datasources.BytesEncoder;
+import org.apache.hadoop.hbase.spark.datasources.Field;
+import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder;
+import org.apache.hadoop.hbase.spark.protobuf.generated.SparkFilterProtos;
+import org.apache.hadoop.hbase.util.ByteStringer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import scala.collection.mutable.MutableList;
+
+/**
+ * This filter will push down all qualifier logic given to us
+ * by SparkSQL so that we have make the filters at the region server level
+ * and avoid sending the data back to the client to be filtered.
+ */
+@InterfaceAudience.Private
+public class SparkSQLPushDownFilter extends FilterBase{
+  protected static final Logger log = LoggerFactory.getLogger(SparkSQLPushDownFilter.class);
+
+  //The following values are populated with protobuffer
+  DynamicLogicExpression dynamicLogicExpression;
+  byte[][] valueFromQueryArray;
+  HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
+          currentCellToColumnIndexMap;
+
+  //The following values are transient
+  HashMap<String, ByteArrayComparable> columnToCurrentRowValueMap = null;
+
+  static final byte[] rowKeyFamily = new byte[0];
+  static final byte[] rowKeyQualifier = Bytes.toBytes("key");
+
+  String encoderClassName;
+
+  public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
+                                byte[][] valueFromQueryArray,
+                                HashMap<ByteArrayComparable,
+                                        HashMap<ByteArrayComparable, String>>
+                                        currentCellToColumnIndexMap, String encoderClassName) {
+    this.dynamicLogicExpression = dynamicLogicExpression;
+    this.valueFromQueryArray = valueFromQueryArray;
+    this.currentCellToColumnIndexMap = currentCellToColumnIndexMap;
+    this.encoderClassName = encoderClassName;
+  }
+
+  public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
+                                byte[][] valueFromQueryArray,
+                                MutableList<Field> fields, String encoderClassName) {
+    this.dynamicLogicExpression = dynamicLogicExpression;
+    this.valueFromQueryArray = valueFromQueryArray;
+    this.encoderClassName = encoderClassName;
+
+    //generate family qualifier to index mapping
+    this.currentCellToColumnIndexMap =
+            new HashMap<>();
+
+    for (int i = 0; i < fields.size(); i++) {
+      Field field = fields.apply(i);
+
+      byte[] cfBytes = field.cfBytes();
+      ByteArrayComparable familyByteComparable =
+          new ByteArrayComparable(cfBytes, 0, cfBytes.length);
+
+      HashMap<ByteArrayComparable, String> qualifierIndexMap =
+              currentCellToColumnIndexMap.get(familyByteComparable);
+
+      if (qualifierIndexMap == null) {
+        qualifierIndexMap = new HashMap<>();
+        currentCellToColumnIndexMap.put(familyByteComparable, qualifierIndexMap);
+      }
+      byte[] qBytes = field.colBytes();
+      ByteArrayComparable qualifierByteComparable =
+          new ByteArrayComparable(qBytes, 0, qBytes.length);
+
+      qualifierIndexMap.put(qualifierByteComparable, field.colName());
+    }
+  }
+
+  @Override
+  public ReturnCode filterCell(final Cell c) throws IOException {
+
+    //If the map RowValueMap is empty then we need to populate
+    // the row key
+    if (columnToCurrentRowValueMap == null) {
+      columnToCurrentRowValueMap = new HashMap<>();
+      HashMap<ByteArrayComparable, String> qualifierColumnMap =
+              currentCellToColumnIndexMap.get(
+                      new ByteArrayComparable(rowKeyFamily, 0, rowKeyFamily.length));
+
+      if (qualifierColumnMap != null) {
+        String rowKeyColumnName =
+                qualifierColumnMap.get(
+                        new ByteArrayComparable(rowKeyQualifier, 0,
+                                rowKeyQualifier.length));
+        //Make sure that the rowKey is part of the where clause
+        if (rowKeyColumnName != null) {
+          columnToCurrentRowValueMap.put(rowKeyColumnName,
+                  new ByteArrayComparable(c.getRowArray(),
+                          c.getRowOffset(), c.getRowLength()));
+        }
+      }
+    }
+
+    //Always populate the column value into the RowValueMap
+    ByteArrayComparable currentFamilyByteComparable =
+            new ByteArrayComparable(c.getFamilyArray(),
+            c.getFamilyOffset(),
+            c.getFamilyLength());
+
+    HashMap<ByteArrayComparable, String> qualifierColumnMap =
+            currentCellToColumnIndexMap.get(
+                    currentFamilyByteComparable);
+
+    if (qualifierColumnMap != null) {
+
+      String columnName =
+              qualifierColumnMap.get(
+                      new ByteArrayComparable(c.getQualifierArray(),
+                              c.getQualifierOffset(),
+                              c.getQualifierLength()));
+
+      if (columnName != null) {
+        columnToCurrentRowValueMap.put(columnName,
+                new ByteArrayComparable(c.getValueArray(),
+                        c.getValueOffset(), c.getValueLength()));
+      }
+    }
+
+    return ReturnCode.INCLUDE;
+  }
+
+
+  @Override
+  public boolean filterRow() throws IOException {
+
+    try {
+      boolean result =
+              dynamicLogicExpression.execute(columnToCurrentRowValueMap,
+                      valueFromQueryArray);
+      columnToCurrentRowValueMap = null;
+      return !result;
+    } catch (Throwable e) {
+      log.error("Error running dynamic logic on row", e);
+    }
+    return false;
+  }
+
+
+  /**
+   * @param pbBytes A pb serialized instance
+   * @return An instance of SparkSQLPushDownFilter
+   * @throws DeserializationException if the filter cannot be parsed from the given bytes
+   */
+  @SuppressWarnings("unused")
+  public static SparkSQLPushDownFilter parseFrom(final byte[] pbBytes)
+          throws DeserializationException {
+
+    SparkFilterProtos.SQLPredicatePushDownFilter proto;
+    try {
+      proto = SparkFilterProtos.SQLPredicatePushDownFilter.parseFrom(pbBytes);
+    } catch (InvalidProtocolBufferException e) {
+      throw new DeserializationException(e);
+    }
+
+    String encoder = proto.getEncoderClassName();
+    BytesEncoder enc = JavaBytesEncoder.create(encoder);
+
+    //Load DynamicLogicExpression
+    DynamicLogicExpression dynamicLogicExpression =
+            DynamicLogicExpressionBuilder.build(proto.getDynamicLogicExpression(), enc);
+
+    //Load valuesFromQuery
+    final List<ByteString> valueFromQueryArrayList = proto.getValueFromQueryArrayList();
+    byte[][] valueFromQueryArray = new byte[valueFromQueryArrayList.size()][];
+    for (int i = 0; i < valueFromQueryArrayList.size(); i++) {
+      valueFromQueryArray[i] = valueFromQueryArrayList.get(i).toByteArray();
+    }
+
+    //Load mapping from HBase family/qualifier to Spark SQL columnName
+    HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
+            currentCellToColumnIndexMap = new HashMap<>();
+
+    for (SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping
+            sqlPredicatePushDownCellToColumnMapping :
+            proto.getCellToColumnMappingList()) {
+
+      byte[] familyArray =
+              sqlPredicatePushDownCellToColumnMapping.getColumnFamily().toByteArray();
+      ByteArrayComparable familyByteComparable =
+              new ByteArrayComparable(familyArray, 0, familyArray.length);
+      HashMap<ByteArrayComparable, String> qualifierMap =
+              currentCellToColumnIndexMap.get(familyByteComparable);
+
+      if (qualifierMap == null) {
+        qualifierMap = new HashMap<>();
+        currentCellToColumnIndexMap.put(familyByteComparable, qualifierMap);
+      }
+      byte[] qualifierArray =
+              sqlPredicatePushDownCellToColumnMapping.getQualifier().toByteArray();
+
+      ByteArrayComparable qualifierByteComparable =
+              new ByteArrayComparable(qualifierArray, 0 ,qualifierArray.length);
+
+      qualifierMap.put(qualifierByteComparable,
+              sqlPredicatePushDownCellToColumnMapping.getColumnName());
+    }
+
+    return new SparkSQLPushDownFilter(dynamicLogicExpression,
+            valueFromQueryArray, currentCellToColumnIndexMap, encoder);
+  }
+
+  /**
+   * @return The filter serialized using pb
+   */
+  public byte[] toByteArray() {
+
+    SparkFilterProtos.SQLPredicatePushDownFilter.Builder builder =
+            SparkFilterProtos.SQLPredicatePushDownFilter.newBuilder();
+
+    SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.Builder columnMappingBuilder =
+            SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.newBuilder();
+
+    builder.setDynamicLogicExpression(dynamicLogicExpression.toExpressionString());
+    for (byte[] valueFromQuery: valueFromQueryArray) {
+      builder.addValueFromQueryArray(ByteStringer.wrap(valueFromQuery));
+    }
+
+    for (Map.Entry<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
+            familyEntry : currentCellToColumnIndexMap.entrySet()) {
+      for (Map.Entry<ByteArrayComparable, String> qualifierEntry :
+              familyEntry.getValue().entrySet()) {
+        columnMappingBuilder.setColumnFamily(
+                ByteStringer.wrap(familyEntry.getKey().bytes()));
+        columnMappingBuilder.setQualifier(
+                ByteStringer.wrap(qualifierEntry.getKey().bytes()));
+        columnMappingBuilder.setColumnName(qualifierEntry.getValue());
+        builder.addCellToColumnMapping(columnMappingBuilder.build());
+      }
+    }
+    builder.setEncoderClassName(encoderClassName);
+
+
+    return builder.build().toByteArray();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof SparkSQLPushDownFilter)) {
+      return false;
+    }
+    if (this == obj) {
+      return true;
+    }
+    SparkSQLPushDownFilter f = (SparkSQLPushDownFilter) obj;
+    if (this.valueFromQueryArray.length != f.valueFromQueryArray.length) {
+      return false;
+    }
+    int i = 0;
+    for (byte[] val : this.valueFromQueryArray) {
+      if (!Bytes.equals(val, f.valueFromQueryArray[i])) {
+        return false;
+      }
+      i++;
+    }
+    return this.dynamicLogicExpression.equals(f.dynamicLogicExpression) &&
+      this.currentCellToColumnIndexMap.equals(f.currentCellToColumnIndexMap) &&
+      this.encoderClassName.equals(f.encoderClassName);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(this.dynamicLogicExpression, Arrays.hashCode(this.valueFromQueryArray),
+      this.currentCellToColumnIndexMap, this.encoderClassName);
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkDeleteExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkDeleteExample.java
new file mode 100644
index 0000000..8cf2c7f
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkDeleteExample.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This is a simple example of deleting records in HBase
+ * with the bulkDelete function.
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseBulkDeleteExample {
+
+  private JavaHBaseBulkDeleteExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 1) {
+      System.out.println("JavaHBaseBulkDeleteExample  {tableName}");
+      return;
+    }
+
+    String tableName = args[0];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkDeleteExample " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      List<byte[]> list = new ArrayList<>(5);
+      list.add(Bytes.toBytes("1"));
+      list.add(Bytes.toBytes("2"));
+      list.add(Bytes.toBytes("3"));
+      list.add(Bytes.toBytes("4"));
+      list.add(Bytes.toBytes("5"));
+
+      JavaRDD<byte[]> rdd = jsc.parallelize(list);
+
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      hbaseContext.bulkDelete(rdd,
+              TableName.valueOf(tableName), new DeleteFunction(), 4);
+    } finally {
+      jsc.stop();
+    }
+
+  }
+
+  public static class DeleteFunction implements Function<byte[], Delete> {
+    private static final long serialVersionUID = 1L;
+    public Delete call(byte[] v) throws Exception {
+      return new Delete(v);
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkGetExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkGetExample.java
new file mode 100644
index 0000000..b5143de
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkGetExample.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This is a simple example of getting records in HBase
+ * with the bulkGet function.
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseBulkGetExample {
+
+  private JavaHBaseBulkGetExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 1) {
+      System.out.println("JavaHBaseBulkGetExample  {tableName}");
+      return;
+    }
+
+    String tableName = args[0];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      List<byte[]> list = new ArrayList<>(5);
+      list.add(Bytes.toBytes("1"));
+      list.add(Bytes.toBytes("2"));
+      list.add(Bytes.toBytes("3"));
+      list.add(Bytes.toBytes("4"));
+      list.add(Bytes.toBytes("5"));
+
+      JavaRDD<byte[]> rdd = jsc.parallelize(list);
+
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd, new GetFunction(),
+              new ResultFunction());
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public static class GetFunction implements Function<byte[], Get> {
+
+    private static final long serialVersionUID = 1L;
+
+    public Get call(byte[] v) throws Exception {
+      return new Get(v);
+    }
+  }
+
+  public static class ResultFunction implements Function<Result, String> {
+
+    private static final long serialVersionUID = 1L;
+
+    public String call(Result result) throws Exception {
+      Iterator<Cell> it = result.listCells().iterator();
+      StringBuilder b = new StringBuilder();
+
+      b.append(Bytes.toString(result.getRow())).append(":");
+
+      while (it.hasNext()) {
+        Cell cell = it.next();
+        String q = Bytes.toString(cell.getQualifierArray());
+        if (q.equals("counter")) {
+          b.append("(")
+                  .append(Bytes.toString(cell.getQualifierArray()))
+                  .append(",")
+                  .append(Bytes.toLong(cell.getValueArray()))
+                  .append(")");
+        } else {
+          b.append("(")
+                  .append(Bytes.toString(cell.getQualifierArray()))
+                  .append(",")
+                  .append(Bytes.toString(cell.getValueArray()))
+                  .append(")");
+        }
+      }
+      return b.toString();
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkLoadExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkLoadExample.java
new file mode 100644
index 0000000..6738059
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkLoadExample.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.spark.FamilyHFileWriteOptions;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.spark.KeyFamilyQualifier;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Run this example using command below:
+ *
+ *  SPARK_HOME/bin/spark-submit --master local[2]
+ *  --class org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkLoadExample
+ *  path/to/hbase-spark.jar {path/to/output/HFiles}
+ *
+ * This example will output put hfiles in {path/to/output/HFiles}, and user can run
+ * 'hbase org.apache.hadoop.hbase.tool.LoadIncrementalHFiles' to load the HFiles into table to
+ * verify this example.
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseBulkLoadExample {
+  private JavaHBaseBulkLoadExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 1) {
+      System.out.println("JavaHBaseBulkLoadExample  " + "{outputPath}");
+      return;
+    }
+
+    String tableName = "bulkload-table-test";
+    String columnFamily1 = "f1";
+    String columnFamily2 = "f2";
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkLoadExample " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      List<String> list= new ArrayList<String>();
+      // row1
+      list.add("1," + columnFamily1 + ",b,1");
+      // row3
+      list.add("3," + columnFamily1 + ",a,2");
+      list.add("3," + columnFamily1 + ",b,1");
+      list.add("3," + columnFamily2 + ",a,1");
+      /* row2 */
+      list.add("2," + columnFamily2 + ",a,3");
+      list.add("2," + columnFamily2 + ",b,3");
+
+      JavaRDD<String> rdd = jsc.parallelize(list);
+
+      Configuration conf = HBaseConfiguration.create();
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+
+
+      hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName),new BulkLoadFunction(), args[0],
+          new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE);
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public static class BulkLoadFunction
+          implements Function<String, Pair<KeyFamilyQualifier, byte[]>> {
+    @Override
+    public Pair<KeyFamilyQualifier, byte[]> call(String v1) throws Exception {
+      if (v1 == null) {
+        return null;
+      }
+
+      String[] strs = v1.split(",");
+      if(strs.length != 4) {
+        return null;
+      }
+
+      KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]),
+              Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]));
+      return new Pair(kfq, Bytes.toBytes(strs[3]));
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkPutExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkPutExample.java
new file mode 100644
index 0000000..4a80b96
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkPutExample.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This is a simple example of putting records in HBase
+ * with the bulkPut function.
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseBulkPutExample {
+
+  private JavaHBaseBulkPutExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 2) {
+      System.out.println("JavaHBaseBulkPutExample  " +
+              "{tableName} {columnFamily}");
+      return;
+    }
+
+    String tableName = args[0];
+    String columnFamily = args[1];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkPutExample " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      List<String> list = new ArrayList<>(5);
+      list.add("1," + columnFamily + ",a,1");
+      list.add("2," + columnFamily + ",a,2");
+      list.add("3," + columnFamily + ",a,3");
+      list.add("4," + columnFamily + ",a,4");
+      list.add("5," + columnFamily + ",a,5");
+
+      JavaRDD<String> rdd = jsc.parallelize(list);
+
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      hbaseContext.bulkPut(rdd,
+              TableName.valueOf(tableName),
+              new PutFunction());
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public static class PutFunction implements Function<String, Put> {
+
+    private static final long serialVersionUID = 1L;
+
+    public Put call(String v) throws Exception {
+      String[] cells = v.split(",");
+      Put put = new Put(Bytes.toBytes(cells[0]));
+
+      put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]),
+              Bytes.toBytes(cells[3]));
+      return put;
+    }
+
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseDistributedScan.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseDistributedScan.java
new file mode 100644
index 0000000..0d4f680
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseDistributedScan.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.yetus.audience.InterfaceAudience;
+import scala.Tuple2;
+
+/**
+ * This is a simple example of scanning records from HBase
+ * with the hbaseRDD function.
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseDistributedScan {
+
+  private JavaHBaseDistributedScan() {}
+
+  public static void main(String[] args) {
+    if (args.length < 1) {
+      System.out.println("JavaHBaseDistributedScan {tableName}");
+      return;
+    }
+
+    String tableName = args[0];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseDistributedScan " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      Scan scan = new Scan();
+      scan.setCaching(100);
+
+      JavaRDD<Tuple2<ImmutableBytesWritable, Result>> javaRdd =
+              hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan);
+
+      List<String> results = javaRdd.map(new ScanConvertFunction()).collect();
+
+      System.out.println("Result Size: " + results.size());
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  private static class ScanConvertFunction implements
+          Function<Tuple2<ImmutableBytesWritable, Result>, String> {
+    @Override
+    public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception {
+      return Bytes.toString(v1._1().copyBytes());
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseMapGetPutExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseMapGetPutExample.java
new file mode 100644
index 0000000..a55d853
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseMapGetPutExample.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.VoidFunction;
+import org.apache.yetus.audience.InterfaceAudience;
+import scala.Tuple2;
+
+/**
+ * This is a simple example of using the foreachPartition
+ * method with a HBase connection
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseMapGetPutExample {
+
+  private JavaHBaseMapGetPutExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 1) {
+      System.out.println("JavaHBaseBulkGetExample {tableName}");
+      return;
+    }
+
+    final String tableName = args[0];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      List<byte[]> list = new ArrayList<>(5);
+      list.add(Bytes.toBytes("1"));
+      list.add(Bytes.toBytes("2"));
+      list.add(Bytes.toBytes("3"));
+      list.add(Bytes.toBytes("4"));
+      list.add(Bytes.toBytes("5"));
+
+      JavaRDD<byte[]> rdd = jsc.parallelize(list);
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      hbaseContext.foreachPartition(rdd,
+              new VoidFunction<Tuple2<Iterator<byte[]>, Connection>>() {
+          public void call(Tuple2<Iterator<byte[]>, Connection> t)
+                  throws Exception {
+            Table table = t._2().getTable(TableName.valueOf(tableName));
+            BufferedMutator mutator = t._2().getBufferedMutator(TableName.valueOf(tableName));
+
+            while (t._1().hasNext()) {
+              byte[] b = t._1().next();
+              Result r = table.get(new Get(b));
+              if (r.getExists()) {
+                mutator.mutate(new Put(b));
+              }
+            }
+
+            mutator.flush();
+            mutator.close();
+            table.close();
+          }
+        });
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public static class GetFunction implements Function<byte[], Get> {
+    private static final long serialVersionUID = 1L;
+    public Get call(byte[] v) throws Exception {
+      return new Get(v);
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseStreamingBulkPutExample.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseStreamingBulkPutExample.java
new file mode 100644
index 0000000..74fadc6
--- /dev/null
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseStreamingBulkPutExample.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.spark.JavaHBaseContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This is a simple example of BulkPut with Spark Streaming
+ */
+@InterfaceAudience.Private
+final public class JavaHBaseStreamingBulkPutExample {
+
+  private JavaHBaseStreamingBulkPutExample() {}
+
+  public static void main(String[] args) {
+    if (args.length < 4) {
+      System.out.println("JavaHBaseBulkPutExample  " +
+              "{host} {port} {tableName}");
+      return;
+    }
+
+    String host = args[0];
+    String port = args[1];
+    String tableName = args[2];
+
+    SparkConf sparkConf =
+            new SparkConf().setAppName("JavaHBaseStreamingBulkPutExample " +
+                    tableName + ":" + port + ":" + tableName);
+
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      JavaStreamingContext jssc =
+              new JavaStreamingContext(jsc, new Duration(1000));
+
+      JavaReceiverInputDStream<String> javaDstream =
+              jssc.socketTextStream(host, Integer.parseInt(port));
+
+      Configuration conf = HBaseConfiguration.create();
+
+      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+      hbaseContext.streamBulkPut(javaDstream,
+              TableName.valueOf(tableName),
+              new PutFunction());
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public static class PutFunction implements Function<String, Put> {
+
+    private static final long serialVersionUID = 1L;
+
+    public Put call(String v) throws Exception {
+      String[] part = v.split(",");
+      Put put = new Put(Bytes.toBytes(part[0]));
+
+      put.addColumn(Bytes.toBytes(part[1]),
+              Bytes.toBytes(part[2]),
+              Bytes.toBytes(part[3]));
+      return put;
+    }
+
+  }
+}
diff --git a/spark/hbase-spark/src/main/protobuf/SparkFilter.proto b/spark/hbase-spark/src/main/protobuf/SparkFilter.proto
new file mode 100644
index 0000000..e16c551
--- /dev/null
+++ b/spark/hbase-spark/src/main/protobuf/SparkFilter.proto
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file contains protocol buffers that are used for Spark filters
+// over in the hbase-spark module
+package hbase.pb;
+
+option java_package = "org.apache.hadoop.hbase.spark.protobuf.generated";
+option java_outer_classname = "SparkFilterProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+option optimize_for = SPEED;
+
+message SQLPredicatePushDownCellToColumnMapping {
+  required bytes column_family = 1;
+  required bytes qualifier = 2;
+  required string column_name = 3;
+}
+
+message SQLPredicatePushDownFilter {
+  required string dynamic_logic_expression = 1;
+  repeated bytes value_from_query_array = 2;
+  repeated SQLPredicatePushDownCellToColumnMapping cell_to_column_mapping = 3;
+  optional string encoderClassName = 4;
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala
new file mode 100644
index 0000000..9442c50
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util
+import java.util.Comparator
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.Partitioner
+
+/**
+ * A Partitioner implementation that will separate records to different
+ * HBase Regions based on region splits
+ *
+ * @param startKeys   The start keys for the given table
+ */
+@InterfaceAudience.Public
+class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
+  extends Partitioner {
+  // when table not exist, startKeys = Byte[0][]
+  override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length
+
+  override def getPartition(key: Any): Int = {
+
+    val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
+      override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
+        Bytes.compareTo(o1, o2)
+      }
+    }
+
+    val rowKey:Array[Byte] =
+      key match {
+        case qualifier: KeyFamilyQualifier =>
+          qualifier.rowKey
+        case wrapper: ByteArrayWrapper =>
+          wrapper.value
+        case _ =>
+          key.asInstanceOf[Array[Byte]]
+      }
+    var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
+    if (partition < 0)
+      partition = partition * -1 + -2
+    if (partition < 0)
+      partition = 0
+    partition
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala
new file mode 100644
index 0000000..2d0be38
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala
@@ -0,0 +1,49 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+
+@InterfaceAudience.Public
+class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1)
+  extends Comparable[ByteArrayComparable] {
+
+  if (length == -1) {
+    length = bytes.length
+  }
+
+  override def compareTo(o: ByteArrayComparable): Int = {
+    Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length)
+  }
+
+  override def hashCode(): Int = {
+    Bytes.hashCode(bytes, offset, length)
+  }
+
+  override def equals (obj: Any): Boolean = {
+    obj match {
+      case b: ByteArrayComparable =>
+        Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length)
+      case _ =>
+        false
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala
new file mode 100644
index 0000000..738fa45
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import java.io.Serializable
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * This is a wrapper over a byte array so it can work as
+ * a key in a hashMap
+ *
+ * @param value The Byte Array value
+ */
+@InterfaceAudience.Public
+class ByteArrayWrapper (var value:Array[Byte])
+  extends Comparable[ByteArrayWrapper] with Serializable {
+  override def compareTo(valueOther: ByteArrayWrapper): Int = {
+    Bytes.compareTo(value,valueOther.value)
+  }
+  override def equals(o2: Any): Boolean = {
+    o2 match {
+      case wrapper: ByteArrayWrapper =>
+        Bytes.equals(value, wrapper.value)
+      case _ =>
+        false
+    }
+  }
+  override def hashCode():Int = {
+    Bytes.hashCode(value)
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala
new file mode 100644
index 0000000..3037001
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * A wrapper class that will allow both columnFamily and qualifier to
+ * be the key of a hashMap.  Also allow for finding the value in a hashmap
+ * with out cloning the HBase value from the HBase Cell object
+ * @param columnFamily       ColumnFamily byte array
+ * @param columnFamilyOffSet Offset of columnFamily value in the array
+ * @param columnFamilyLength Length of the columnFamily value in the columnFamily array
+ * @param qualifier          Qualifier byte array
+ * @param qualifierOffSet    Offset of qualifier value in the array
+ * @param qualifierLength    Length of the qualifier value with in the array
+ */
+@InterfaceAudience.Public
+class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte],
+                                         val columnFamilyOffSet:Int,
+                                         val columnFamilyLength:Int,
+                                         val qualifier:Array[Byte],
+                                         val qualifierOffSet:Int,
+                                         val qualifierLength:Int)
+  extends Serializable{
+
+  override def equals(other:Any): Boolean = {
+    val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper]
+
+    Bytes.compareTo(columnFamily,
+      columnFamilyOffSet,
+      columnFamilyLength,
+      otherWrapper.columnFamily,
+      otherWrapper.columnFamilyOffSet,
+      otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo(qualifier,
+        qualifierOffSet,
+        qualifierLength,
+        otherWrapper.qualifier,
+        otherWrapper.qualifierOffSet,
+        otherWrapper.qualifierLength) == 0
+  }
+
+  override def hashCode():Int = {
+    Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) +
+      Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength)
+  }
+
+  def cloneColumnFamily():Array[Byte] = {
+    val resultArray = new Array[Byte](columnFamilyLength)
+    System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength)
+    resultArray
+  }
+
+  def cloneQualifier():Array[Byte] = {
+    val resultArray = new Array[Byte](qualifierLength)
+    System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength)
+    resultArray
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala
new file mode 100644
index 0000000..4e05695
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala
@@ -0,0 +1,1222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util
+import java.util.concurrent.ConcurrentLinkedQueue
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.hadoop.hbase.mapred.TableOutputFormat
+import org.apache.hadoop.hbase.spark.datasources._
+import org.apache.hadoop.hbase.types._
+import org.apache.hadoop.hbase.util.{Bytes, PositionedByteRange, SimplePositionedMutableByteRange}
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.HTableDescriptor
+import org.apache.hadoop.hbase.HColumnDescriptor
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.CellUtil
+import org.apache.hadoop.mapred.JobConf
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SaveMode, Row, SQLContext}
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+
+import scala.collection.mutable
+
+/**
+ * DefaultSource for integration with Spark's dataframe datasources.
+ * This class will produce a relationProvider based on input given to it from spark
+ *
+ * This class needs to stay in the current package 'org.apache.hadoop.hbase.spark'
+ * for Spark to match the hbase data source name.
+ *
+ * In all this DefaultSource support the following datasource functionality
+ * - Scan range pruning through filter push down logic based on rowKeys
+ * - Filter push down logic on HBase Cells
+ * - Qualifier filtering based on columns used in the SparkSQL statement
+ * - Type conversions of basic SQL types.  All conversions will be
+ *   Through the HBase Bytes object commands.
+ */
+@InterfaceAudience.Private
+class DefaultSource extends RelationProvider  with CreatableRelationProvider with Logging {
+  /**
+   * Is given input from SparkSQL to construct a BaseRelation
+    *
+    * @param sqlContext SparkSQL context
+   * @param parameters Parameters given to us from SparkSQL
+   * @return           A BaseRelation Object
+   */
+  override def createRelation(sqlContext: SQLContext,
+                              parameters: Map[String, String]):
+  BaseRelation = {
+    new HBaseRelation(parameters, None)(sqlContext)
+  }
+
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val relation = HBaseRelation(parameters, Some(data.schema))(sqlContext)
+    relation.createTable()
+    relation.insert(data, false)
+    relation
+  }
+}
+
+/**
+ * Implementation of Spark BaseRelation that will build up our scan logic
+ * , do the scan pruning, filter push down, and value conversions
+  *
+  * @param sqlContext              SparkSQL context
+ */
+@InterfaceAudience.Private
+case class HBaseRelation (
+    @transient parameters: Map[String, String],
+    userSpecifiedSchema: Option[StructType]
+  )(@transient val sqlContext: SQLContext)
+  extends BaseRelation with PrunedFilteredScan  with InsertableRelation  with Logging {
+  val timestamp = parameters.get(HBaseSparkConf.TIMESTAMP).map(_.toLong)
+  val minTimestamp = parameters.get(HBaseSparkConf.TIMERANGE_START).map(_.toLong)
+  val maxTimestamp = parameters.get(HBaseSparkConf.TIMERANGE_END).map(_.toLong)
+  val maxVersions = parameters.get(HBaseSparkConf.MAX_VERSIONS).map(_.toInt)
+  val encoderClsName = parameters.get(HBaseSparkConf.QUERY_ENCODER).getOrElse(HBaseSparkConf.DEFAULT_QUERY_ENCODER)
+
+  @transient val encoder = JavaBytesEncoder.create(encoderClsName)
+
+  val catalog = HBaseTableCatalog(parameters)
+  def tableName = catalog.name
+  val configResources = parameters.get(HBaseSparkConf.HBASE_CONFIG_LOCATION)
+  val useHBaseContext =  parameters.get(HBaseSparkConf.USE_HBASECONTEXT).map(_.toBoolean).getOrElse(HBaseSparkConf.DEFAULT_USE_HBASECONTEXT)
+  val usePushDownColumnFilter = parameters.get(HBaseSparkConf.PUSHDOWN_COLUMN_FILTER)
+    .map(_.toBoolean).getOrElse(HBaseSparkConf.DEFAULT_PUSHDOWN_COLUMN_FILTER)
+
+  // The user supplied per table parameter will overwrite global ones in SparkConf
+  val blockCacheEnable = parameters.get(HBaseSparkConf.QUERY_CACHEBLOCKS).map(_.toBoolean)
+    .getOrElse(
+      sqlContext.sparkContext.getConf.getBoolean(
+        HBaseSparkConf.QUERY_CACHEBLOCKS, HBaseSparkConf.DEFAULT_QUERY_CACHEBLOCKS))
+  val cacheSize = parameters.get(HBaseSparkConf.QUERY_CACHEDROWS).map(_.toInt)
+    .getOrElse(
+      sqlContext.sparkContext.getConf.getInt(
+      HBaseSparkConf.QUERY_CACHEDROWS, -1))
+  val batchNum = parameters.get(HBaseSparkConf.QUERY_BATCHSIZE).map(_.toInt)
+    .getOrElse(sqlContext.sparkContext.getConf.getInt(
+    HBaseSparkConf.QUERY_BATCHSIZE,  -1))
+
+  val bulkGetSize =  parameters.get(HBaseSparkConf.BULKGET_SIZE).map(_.toInt)
+    .getOrElse(sqlContext.sparkContext.getConf.getInt(
+    HBaseSparkConf.BULKGET_SIZE,  HBaseSparkConf.DEFAULT_BULKGET_SIZE))
+
+  //create or get latest HBaseContext
+  val hbaseContext:HBaseContext = if (useHBaseContext) {
+    LatestHBaseContextCache.latest
+  } else {
+    val config = HBaseConfiguration.create()
+    configResources.map(resource => resource.split(",").foreach(r => config.addResource(r)))
+    new HBaseContext(sqlContext.sparkContext, config)
+  }
+
+  val wrappedConf = new SerializableConfiguration(hbaseContext.config)
+  def hbaseConf = wrappedConf.value
+
+  /**
+   * Generates a Spark SQL schema objeparametersct so Spark SQL knows what is being
+   * provided by this BaseRelation
+   *
+   * @return schema generated from the SCHEMA_COLUMNS_MAPPING_KEY value
+   */
+  override val schema: StructType = userSpecifiedSchema.getOrElse(catalog.toDataType)
+
+
+
+  def createTable() {
+    val numReg = parameters.get(HBaseTableCatalog.newTable).map(x => x.toInt).getOrElse(0)
+    val startKey =  Bytes.toBytes(
+      parameters.get(HBaseTableCatalog.regionStart)
+        .getOrElse(HBaseTableCatalog.defaultRegionStart))
+    val endKey = Bytes.toBytes(
+      parameters.get(HBaseTableCatalog.regionEnd)
+        .getOrElse(HBaseTableCatalog.defaultRegionEnd))
+    if (numReg > 3) {
+      val tName = TableName.valueOf(catalog.name)
+      val cfs = catalog.getColumnFamilies
+
+      val connection = HBaseConnectionCache.getConnection(hbaseConf)
+      // Initialize hBase table if necessary
+      val admin = connection.getAdmin
+      try {
+        if (!admin.isTableAvailable(tName)) {
+          val tableDesc = new HTableDescriptor(tName)
+          cfs.foreach { x =>
+            val cf = new HColumnDescriptor(x.getBytes())
+            logDebug(s"add family $x to ${catalog.name}")
+            tableDesc.addFamily(cf)
+          }
+          val splitKeys = Bytes.split(startKey, endKey, numReg);
+          admin.createTable(tableDesc, splitKeys)
+
+        }
+      }finally {
+        admin.close()
+        connection.close()
+      }
+    } else {
+      logInfo(
+        s"""${HBaseTableCatalog.newTable}
+           |is not defined or no larger than 3, skip the create table""".stripMargin)
+    }
+  }
+
+  /**
+    *
+    * @param data
+    * @param overwrite
+    */
+  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+    val jobConfig: JobConf = new JobConf(hbaseConf, this.getClass)
+    jobConfig.setOutputFormat(classOf[TableOutputFormat])
+    jobConfig.set(TableOutputFormat.OUTPUT_TABLE, catalog.name)
+    var count = 0
+    val rkFields = catalog.getRowKey
+    val rkIdxedFields = rkFields.map{ case x =>
+      (schema.fieldIndex(x.colName), x)
+    }
+    val colsIdxedFields = schema
+      .fieldNames
+      .partition( x => rkFields.map(_.colName).contains(x))
+      ._2.map(x => (schema.fieldIndex(x), catalog.getField(x)))
+    val rdd = data.rdd
+    def convertToPut(row: Row) = {
+      // construct bytes for row key
+      val rowBytes = rkIdxedFields.map { case (x, y) =>
+        Utils.toBytes(row(x), y)
+      }
+      val rLen = rowBytes.foldLeft(0) { case (x, y) =>
+        x + y.length
+      }
+      val rBytes = new Array[Byte](rLen)
+      var offset = 0
+      rowBytes.foreach { x =>
+        System.arraycopy(x, 0, rBytes, offset, x.length)
+        offset += x.length
+      }
+      val put = timestamp.fold(new Put(rBytes))(new Put(rBytes, _))
+
+      colsIdxedFields.foreach { case (x, y) =>
+        val b = Utils.toBytes(row(x), y)
+        put.addColumn(Bytes.toBytes(y.cf), Bytes.toBytes(y.col), b)
+      }
+      count += 1
+      (new ImmutableBytesWritable, put)
+    }
+    rdd.map(convertToPut(_)).saveAsHadoopDataset(jobConfig)
+  }
+
+  def getIndexedProjections(requiredColumns: Array[String]): Seq[(Field, Int)] = {
+    requiredColumns.map(catalog.sMap.getField(_)).zipWithIndex
+  }
+
+
+  /**
+    * Takes a HBase Row object and parses all of the fields from it.
+    * This is independent of which fields were requested from the key
+    * Because we have all the data it's less complex to parse everything.
+    *
+    * @param row the retrieved row from hbase.
+    * @param keyFields all of the fields in the row key, ORDERED by their order in the row key.
+    */
+  def parseRowKey(row: Array[Byte], keyFields: Seq[Field]): Map[Field, Any] = {
+    keyFields.foldLeft((0, Seq[(Field, Any)]()))((state, field) => {
+      val idx = state._1
+      val parsed = state._2
+      if (field.length != -1) {
+        val value = Utils.hbaseFieldToScalaType(field, row, idx, field.length)
+        // Return the new index and appended value
+        (idx + field.length, parsed ++ Seq((field, value)))
+      } else {
+        field.dt match {
+          case StringType =>
+            val pos = row.indexOf(HBaseTableCatalog.delimiter, idx)
+            if (pos == -1 || pos > row.length) {
+              // this is at the last dimension
+              val value = Utils.hbaseFieldToScalaType(field, row, idx, row.length)
+              (row.length + 1, parsed ++ Seq((field, value)))
+            } else {
+              val value = Utils.hbaseFieldToScalaType(field, row, idx, pos - idx)
+              (pos, parsed ++ Seq((field, value)))
+            }
+          // We don't know the length, assume it extends to the end of the rowkey.
+          case _ => (row.length + 1, parsed ++ Seq((field, Utils.hbaseFieldToScalaType(field, row, idx, row.length))))
+        }
+      }
+    })._2.toMap
+  }
+
+  def buildRow(fields: Seq[Field], result: Result): Row = {
+    val r = result.getRow
+    val keySeq = parseRowKey(r, catalog.getRowKey)
+    val valueSeq = fields.filter(!_.isRowKey).map { x =>
+      val kv = result.getColumnLatestCell(Bytes.toBytes(x.cf), Bytes.toBytes(x.col))
+      if (kv == null || kv.getValueLength == 0) {
+        (x, null)
+      } else {
+        val v = CellUtil.cloneValue(kv)
+        (x, x.dt match {
+          // Here, to avoid arraycopy, return v directly instead of calling hbaseFieldToScalaType
+          case BinaryType => v
+          case _ => Utils.hbaseFieldToScalaType(x, v, 0, v.length)
+        })
+      }
+    }.toMap
+    val unionedRow = keySeq ++ valueSeq
+    // Return the row ordered by the requested order
+    Row.fromSeq(fields.map(unionedRow.get(_).getOrElse(null)))
+  }
+
+  /**
+   * Here we are building the functionality to populate the resulting RDD[Row]
+   * Here is where we will do the following:
+   * - Filter push down
+   * - Scan or GetList pruning
+   * - Executing our scan(s) or/and GetList to generate result
+   *
+   * @param requiredColumns The columns that are being requested by the requesting query
+   * @param filters         The filters that are being applied by the requesting query
+   * @return                RDD will all the results from HBase needed for SparkSQL to
+   *                        execute the query on
+   */
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
+
+    val pushDownTuple = buildPushDownPredicatesResource(filters)
+    val pushDownRowKeyFilter = pushDownTuple._1
+    var pushDownDynamicLogicExpression = pushDownTuple._2
+    val valueArray = pushDownTuple._3
+
+    if (!usePushDownColumnFilter) {
+      pushDownDynamicLogicExpression = null
+    }
+
+    logDebug("pushDownRowKeyFilter:           " + pushDownRowKeyFilter.ranges)
+    if (pushDownDynamicLogicExpression != null) {
+      logDebug("pushDownDynamicLogicExpression: " +
+        pushDownDynamicLogicExpression.toExpressionString)
+    }
+    logDebug("valueArray:                     " + valueArray.length)
+
+    val requiredQualifierDefinitionList =
+      new mutable.MutableList[Field]
+
+    requiredColumns.foreach( c => {
+      val field = catalog.getField(c)
+      requiredQualifierDefinitionList += field
+    })
+
+    //retain the information for unit testing checks
+    DefaultSourceStaticUtils.populateLatestExecutionRules(pushDownRowKeyFilter,
+      pushDownDynamicLogicExpression)
+
+    val getList = new util.ArrayList[Get]()
+    val rddList = new util.ArrayList[RDD[Row]]()
+
+    //add points to getList
+    pushDownRowKeyFilter.points.foreach(p => {
+      val get = new Get(p)
+      requiredQualifierDefinitionList.foreach( d => {
+        if (d.isRowKey)
+          get.addColumn(d.cfBytes, d.colBytes)
+      })
+      getList.add(get)
+    })
+
+    val pushDownFilterJava = if (usePushDownColumnFilter && pushDownDynamicLogicExpression != null) {
+        Some(new SparkSQLPushDownFilter(pushDownDynamicLogicExpression,
+          valueArray, requiredQualifierDefinitionList, encoderClsName))
+    } else {
+      None
+    }
+    val hRdd = new HBaseTableScanRDD(this, hbaseContext, pushDownFilterJava, requiredQualifierDefinitionList.seq)
+    pushDownRowKeyFilter.points.foreach(hRdd.addPoint(_))
+    pushDownRowKeyFilter.ranges.foreach(hRdd.addRange(_))
+
+    var resultRDD: RDD[Row] = {
+      val tmp = hRdd.map{ r =>
+        val indexedFields = getIndexedProjections(requiredColumns).map(_._1)
+        buildRow(indexedFields, r)
+
+      }
+      if (tmp.partitions.size > 0) {
+        tmp
+      } else {
+        null
+      }
+    }
+
+    if (resultRDD == null) {
+      val scan = new Scan()
+      scan.setCacheBlocks(blockCacheEnable)
+      scan.setBatch(batchNum)
+      scan.setCaching(cacheSize)
+      requiredQualifierDefinitionList.foreach( d =>
+        scan.addColumn(d.cfBytes, d.colBytes))
+
+      val rdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan).map(r => {
+        val indexedFields = getIndexedProjections(requiredColumns).map(_._1)
+        buildRow(indexedFields, r._2)
+      })
+      resultRDD=rdd
+    }
+    resultRDD
+  }
+
+  def buildPushDownPredicatesResource(filters: Array[Filter]):
+  (RowKeyFilter, DynamicLogicExpression, Array[Array[Byte]]) = {
+    var superRowKeyFilter:RowKeyFilter = null
+    val queryValueList = new mutable.MutableList[Array[Byte]]
+    var superDynamicLogicExpression: DynamicLogicExpression = null
+
+    filters.foreach( f => {
+      val rowKeyFilter = new RowKeyFilter()
+      val logicExpression = transverseFilterTree(rowKeyFilter, queryValueList, f)
+      if (superDynamicLogicExpression == null) {
+        superDynamicLogicExpression = logicExpression
+        superRowKeyFilter = rowKeyFilter
+      } else {
+        superDynamicLogicExpression =
+          new AndLogicExpression(superDynamicLogicExpression, logicExpression)
+        superRowKeyFilter.mergeIntersect(rowKeyFilter)
+      }
+
+    })
+
+    val queryValueArray = queryValueList.toArray
+
+    if (superRowKeyFilter == null) {
+      superRowKeyFilter = new RowKeyFilter
+    }
+
+    (superRowKeyFilter, superDynamicLogicExpression, queryValueArray)
+  }
+
+  /**
+    * For some codec, the order may be inconsistent between java primitive
+    * type and its byte array. We may have to  split the predicates on some
+    * of the java primitive type into multiple predicates. The encoder will take
+    * care of it and returning the concrete ranges.
+    *
+    * For example in naive codec,  some of the java primitive types have to be split into multiple
+    * predicates, and union these predicates together to make the predicates be performed correctly.
+    * For example, if we have "COLUMN < 2", we will transform it into
+    * "0 <= COLUMN < 2 OR Integer.MIN_VALUE <= COLUMN <= -1"
+    */
+
+  def transverseFilterTree(parentRowKeyFilter:RowKeyFilter,
+                                  valueArray:mutable.MutableList[Array[Byte]],
+                                  filter:Filter): DynamicLogicExpression = {
+    filter match {
+      case EqualTo(attr, value) =>
+        val field = catalog.getField(attr)
+        if (field != null) {
+          if (field.isRowKey) {
+            parentRowKeyFilter.mergeIntersect(new RowKeyFilter(
+              DefaultSourceStaticUtils.getByteValue(field,
+                value.toString), null))
+          }
+          val byteValue =
+            DefaultSourceStaticUtils.getByteValue(field, value.toString)
+          valueArray += byteValue
+        }
+        new EqualLogicExpression(attr, valueArray.length - 1, false)
+
+      /**
+        * encoder may split the predicates into multiple byte array boundaries.
+        * Each boundaries is mapped into the RowKeyFilter and then is unioned by the reduce
+        * operation. If the data type is not supported, b will be None, and there is
+        * no operation happens on the parentRowKeyFilter.
+        *
+        * Note that because LessThan is not inclusive, thus the first bound should be exclusive,
+        * which is controlled by inc.
+        *
+        * The other predicates, i.e., GreaterThan/LessThanOrEqual/GreaterThanOrEqual follows
+        * the similar logic.
+        */
+      case LessThan(attr, value) =>
+        val field = catalog.getField(attr)
+        if (field != null) {
+          if (field.isRowKey) {
+            val b = encoder.ranges(value)
+            var inc = false
+            b.map(_.less.map { x =>
+              val r = new RowKeyFilter(null,
+                new ScanRange(x.upper, inc, x.low, true)
+              )
+              inc = true
+              r
+            }).map { x =>
+              x.reduce { (i, j) =>
+                i.mergeUnion(j)
+              }
+            }.map(parentRowKeyFilter.mergeIntersect(_))
+          }
+          val byteValue = encoder.encode(field.dt, value)
+          valueArray += byteValue
+        }
+        new LessThanLogicExpression(attr, valueArray.length - 1)
+      case GreaterThan(attr, value) =>
+        val field = catalog.getField(attr)
+        if (field != null) {
+          if (field.isRowKey) {
+            val b = encoder.ranges(value)
+            var inc = false
+            b.map(_.greater.map{x =>
+              val r = new RowKeyFilter(null,
+                new ScanRange(x.upper, true, x.low, inc))
+              inc = true
+              r
+            }).map { x =>
+              x.reduce { (i, j) =>
+                i.mergeUnion(j)
+              }
+            }.map(parentRowKeyFilter.mergeIntersect(_))
+          }
+          val byteValue = encoder.encode(field.dt, value)
+          valueArray += byteValue
+        }
+        new GreaterThanLogicExpression(attr, valueArray.length - 1)
+      case LessThanOrEqual(attr, value) =>
+        val field = catalog.getField(attr)
+        if (field != null) {
+          if (field.isRowKey) {
+            val b = encoder.ranges(value)
+            b.map(_.less.map(x =>
+              new RowKeyFilter(null,
+                new ScanRange(x.upper, true, x.low, true))))
+              .map { x =>
+                x.reduce{ (i, j) =>
+                  i.mergeUnion(j)
+                }
+              }.map(parentRowKeyFilter.mergeIntersect(_))
+          }
+          val byteValue = encoder.encode(field.dt, value)
+          valueArray += byteValue
+        }
+        new LessThanOrEqualLogicExpression(attr, valueArray.length - 1)
+      case GreaterThanOrEqual(attr, value) =>
+        val field = catalog.getField(attr)
+        if (field != null) {
+          if (field.isRowKey) {
+            val b = encoder.ranges(value)
+            b.map(_.greater.map(x =>
+              new RowKeyFilter(null,
+                new ScanRange(x.upper, true, x.low, true))))
+              .map { x =>
+                x.reduce { (i, j) =>
+                  i.mergeUnion(j)
+                }
+              }.map(parentRowKeyFilter.mergeIntersect(_))
+          }
+          val byteValue = encoder.encode(field.dt, value)
+          valueArray += byteValue
+        }
+        new GreaterThanOrEqualLogicExpression(attr, valueArray.length - 1)
+      case Or(left, right) =>
+        val leftExpression = transverseFilterTree(parentRowKeyFilter, valueArray, left)
+        val rightSideRowKeyFilter = new RowKeyFilter
+        val rightExpression = transverseFilterTree(rightSideRowKeyFilter, valueArray, right)
+
+        parentRowKeyFilter.mergeUnion(rightSideRowKeyFilter)
+
+        new OrLogicExpression(leftExpression, rightExpression)
+      case And(left, right) =>
+
+        val leftExpression = transverseFilterTree(parentRowKeyFilter, valueArray, left)
+        val rightSideRowKeyFilter = new RowKeyFilter
+        val rightExpression = transverseFilterTree(rightSideRowKeyFilter, valueArray, right)
+        parentRowKeyFilter.mergeIntersect(rightSideRowKeyFilter)
+
+        new AndLogicExpression(leftExpression, rightExpression)
+      case IsNull(attr) =>
+        new IsNullLogicExpression(attr, false)
+      case IsNotNull(attr) =>
+        new IsNullLogicExpression(attr, true)
+      case _ =>
+        new PassThroughLogicExpression
+    }
+  }
+}
+
+/**
+ * Construct to contain a single scan ranges information.  Also
+ * provide functions to merge with other scan ranges through AND
+ * or OR operators
+ *
+ * @param upperBound          Upper bound of scan
+ * @param isUpperBoundEqualTo Include upper bound value in the results
+ * @param lowerBound          Lower bound of scan
+ * @param isLowerBoundEqualTo Include lower bound value in the results
+ */
+@InterfaceAudience.Private
+class ScanRange(var upperBound:Array[Byte], var isUpperBoundEqualTo:Boolean,
+                var lowerBound:Array[Byte], var isLowerBoundEqualTo:Boolean)
+  extends Serializable {
+
+  /**
+   * Function to merge another scan object through a AND operation
+    *
+    * @param other Other scan object
+   */
+  def mergeIntersect(other:ScanRange): Unit = {
+    val upperBoundCompare = compareRange(upperBound, other.upperBound)
+    val lowerBoundCompare = compareRange(lowerBound, other.lowerBound)
+
+    upperBound = if (upperBoundCompare <0) upperBound else other.upperBound
+    lowerBound = if (lowerBoundCompare >0) lowerBound else other.lowerBound
+
+    isLowerBoundEqualTo = if (lowerBoundCompare == 0)
+      isLowerBoundEqualTo && other.isLowerBoundEqualTo
+    else isLowerBoundEqualTo
+
+    isUpperBoundEqualTo = if (upperBoundCompare == 0)
+      isUpperBoundEqualTo && other.isUpperBoundEqualTo
+    else isUpperBoundEqualTo
+  }
+
+  /**
+   * Function to merge another scan object through a OR operation
+    *
+    * @param other Other scan object
+   */
+  def mergeUnion(other:ScanRange): Unit = {
+
+    val upperBoundCompare = compareRange(upperBound, other.upperBound)
+    val lowerBoundCompare = compareRange(lowerBound, other.lowerBound)
+
+    upperBound = if (upperBoundCompare >0) upperBound else other.upperBound
+    lowerBound = if (lowerBoundCompare <0) lowerBound else other.lowerBound
+
+    isLowerBoundEqualTo = if (lowerBoundCompare == 0)
+      isLowerBoundEqualTo || other.isLowerBoundEqualTo
+    else if (lowerBoundCompare < 0) isLowerBoundEqualTo else other.isLowerBoundEqualTo
+
+    isUpperBoundEqualTo = if (upperBoundCompare == 0)
+      isUpperBoundEqualTo || other.isUpperBoundEqualTo
+    else if (upperBoundCompare < 0) other.isUpperBoundEqualTo else isUpperBoundEqualTo
+  }
+
+  /**
+   * Common function to see if this scan over laps with another
+   *
+   * Reference Visual
+   *
+   * A                           B
+   * |---------------------------|
+   *   LL--------------LU
+   *        RL--------------RU
+   *
+   * A = lowest value is byte[0]
+   * B = highest value is null
+   * LL = Left Lower Bound
+   * LU = Left Upper Bound
+   * RL = Right Lower Bound
+   * RU = Right Upper Bound
+   *
+   * @param other Other scan object
+   * @return      True is overlap false is not overlap
+   */
+  def getOverLapScanRange(other:ScanRange): ScanRange = {
+
+    var leftRange:ScanRange = null
+    var rightRange:ScanRange = null
+
+    // First identify the Left range
+    // Also lower bound can't be null
+    if (compareRange(lowerBound, other.lowerBound) < 0 ||
+      compareRange(upperBound, other.upperBound) < 0) {
+      leftRange = this
+      rightRange = other
+    } else {
+      leftRange = other
+      rightRange = this
+    }
+
+    if (hasOverlap(leftRange, rightRange)) {
+      // Find the upper bound and lower bound
+      if (compareRange(leftRange.upperBound, rightRange.upperBound) >= 0) {
+        new ScanRange(rightRange.upperBound, rightRange.isUpperBoundEqualTo,
+          rightRange.lowerBound, rightRange.isLowerBoundEqualTo)
+      } else {
+        new ScanRange(leftRange.upperBound, leftRange.isUpperBoundEqualTo,
+          rightRange.lowerBound, rightRange.isLowerBoundEqualTo)
+      }
+    } else {
+      null
+    }
+  }
+
+  /**
+    * The leftRange.upperBound has to be larger than the rightRange's lowerBound.
+    * Otherwise, there is no overlap.
+    *
+    * @param left: The range with the smaller lowBound
+    * @param right: The range with the larger lowBound
+    * @return Whether two ranges have overlap.
+    */
+
+  def hasOverlap(left: ScanRange, right: ScanRange): Boolean = {
+    compareRange(left.upperBound, right.lowerBound) >= 0
+  }
+
+  /**
+   * Special compare logic because we can have null values
+   * for left or right bound
+   *
+   * @param left  Left byte array
+   * @param right Right byte array
+   * @return      0 for equals 1 is left is greater and -1 is right is greater
+   */
+  def compareRange(left:Array[Byte], right:Array[Byte]): Int = {
+    if (left == null && right == null) 0
+    else if (left == null && right != null) 1
+    else if (left != null && right == null) -1
+    else Bytes.compareTo(left, right)
+  }
+
+  /**
+   *
+   * @return
+   */
+  def containsPoint(point:Array[Byte]): Boolean = {
+    val lowerCompare = compareRange(point, lowerBound)
+    val upperCompare = compareRange(point, upperBound)
+
+    ((isLowerBoundEqualTo && lowerCompare >= 0) ||
+      (!isLowerBoundEqualTo && lowerCompare > 0)) &&
+      ((isUpperBoundEqualTo && upperCompare <= 0) ||
+        (!isUpperBoundEqualTo && upperCompare < 0))
+
+  }
+  override def toString:String = {
+    "ScanRange:(upperBound:" + Bytes.toString(upperBound) +
+      ",isUpperBoundEqualTo:" + isUpperBoundEqualTo + ",lowerBound:" +
+      Bytes.toString(lowerBound) + ",isLowerBoundEqualTo:" + isLowerBoundEqualTo + ")"
+  }
+}
+
+/**
+ * Contains information related to a filters for a given column.
+ * This can contain many ranges or points.
+ *
+ * @param currentPoint the initial point when the filter is created
+ * @param currentRange the initial scanRange when the filter is created
+ */
+@InterfaceAudience.Private
+class ColumnFilter (currentPoint:Array[Byte] = null,
+                     currentRange:ScanRange = null,
+                     var points:mutable.MutableList[Array[Byte]] =
+                     new mutable.MutableList[Array[Byte]](),
+                     var ranges:mutable.MutableList[ScanRange] =
+                     new mutable.MutableList[ScanRange]() ) extends Serializable {
+  //Collection of ranges
+  if (currentRange != null ) ranges.+=(currentRange)
+
+  //Collection of points
+  if (currentPoint != null) points.+=(currentPoint)
+
+  /**
+   * This will validate a give value through the filter's points and/or ranges
+   * the result will be if the value passed the filter
+   *
+   * @param value       Value to be validated
+   * @param valueOffSet The offset of the value
+   * @param valueLength The length of the value
+   * @return            True is the value passes the filter false if not
+   */
+  def validate(value:Array[Byte], valueOffSet:Int, valueLength:Int):Boolean = {
+    var result = false
+
+    points.foreach( p => {
+      if (Bytes.equals(p, 0, p.length, value, valueOffSet, valueLength)) {
+        result = true
+      }
+    })
+
+    ranges.foreach( r => {
+      val upperBoundPass = r.upperBound == null ||
+        (r.isUpperBoundEqualTo &&
+          Bytes.compareTo(r.upperBound, 0, r.upperBound.length,
+            value, valueOffSet, valueLength) >= 0) ||
+        (!r.isUpperBoundEqualTo &&
+          Bytes.compareTo(r.upperBound, 0, r.upperBound.length,
+            value, valueOffSet, valueLength) > 0)
+
+      val lowerBoundPass = r.lowerBound == null || r.lowerBound.length == 0
+        (r.isLowerBoundEqualTo &&
+          Bytes.compareTo(r.lowerBound, 0, r.lowerBound.length,
+            value, valueOffSet, valueLength) <= 0) ||
+        (!r.isLowerBoundEqualTo &&
+          Bytes.compareTo(r.lowerBound, 0, r.lowerBound.length,
+            value, valueOffSet, valueLength) < 0)
+
+      result = result || (upperBoundPass && lowerBoundPass)
+    })
+    result
+  }
+
+  /**
+   * This will allow us to merge filter logic that is joined to the existing filter
+   * through a OR operator
+   *
+   * @param other Filter to merge
+   */
+  def mergeUnion(other:ColumnFilter): Unit = {
+    other.points.foreach( p => points += p)
+
+    other.ranges.foreach( otherR => {
+      var doesOverLap = false
+      ranges.foreach{ r =>
+        if (r.getOverLapScanRange(otherR) != null) {
+          r.mergeUnion(otherR)
+          doesOverLap = true
+        }}
+      if (!doesOverLap) ranges.+=(otherR)
+    })
+  }
+
+  /**
+   * This will allow us to merge filter logic that is joined to the existing filter
+   * through a AND operator
+   *
+   * @param other Filter to merge
+   */
+  def mergeIntersect(other:ColumnFilter): Unit = {
+    val survivingPoints = new mutable.MutableList[Array[Byte]]()
+    points.foreach( p => {
+      other.points.foreach( otherP => {
+        if (Bytes.equals(p, otherP)) {
+          survivingPoints.+=(p)
+        }
+      })
+    })
+    points = survivingPoints
+
+    val survivingRanges = new mutable.MutableList[ScanRange]()
+
+    other.ranges.foreach( otherR => {
+      ranges.foreach( r => {
+        if (r.getOverLapScanRange(otherR) != null) {
+          r.mergeIntersect(otherR)
+          survivingRanges += r
+        }
+      })
+    })
+    ranges = survivingRanges
+  }
+
+  override def toString:String = {
+    val strBuilder = new StringBuilder
+    strBuilder.append("(points:(")
+    var isFirst = true
+    points.foreach( p => {
+      if (isFirst) isFirst = false
+      else strBuilder.append(",")
+      strBuilder.append(Bytes.toString(p))
+    })
+    strBuilder.append("),ranges:")
+    isFirst = true
+    ranges.foreach( r => {
+      if (isFirst) isFirst = false
+      else strBuilder.append(",")
+      strBuilder.append(r)
+    })
+    strBuilder.append("))")
+    strBuilder.toString()
+  }
+}
+
+/**
+ * A collection of ColumnFilters indexed by column names.
+ *
+ * Also contains merge commends that will consolidate the filters
+ * per column name
+ */
+@InterfaceAudience.Private
+class ColumnFilterCollection {
+  val columnFilterMap = new mutable.HashMap[String, ColumnFilter]
+
+  def clear(): Unit = {
+    columnFilterMap.clear()
+  }
+
+  /**
+   * This will allow us to merge filter logic that is joined to the existing filter
+   * through a OR operator.  This will merge a single columns filter
+   *
+   * @param column The column to be merged
+   * @param other  The other ColumnFilter object to merge
+   */
+  def mergeUnion(column:String, other:ColumnFilter): Unit = {
+    val existingFilter = columnFilterMap.get(column)
+    if (existingFilter.isEmpty) {
+      columnFilterMap.+=((column, other))
+    } else {
+      existingFilter.get.mergeUnion(other)
+    }
+  }
+
+  /**
+   * This will allow us to merge all filters in the existing collection
+   * to the filters in the other collection.  All merges are done as a result
+   * of a OR operator
+   *
+   * @param other The other Column Filter Collection to be merged
+   */
+  def mergeUnion(other:ColumnFilterCollection): Unit = {
+    other.columnFilterMap.foreach( e => {
+      mergeUnion(e._1, e._2)
+    })
+  }
+
+  /**
+   * This will allow us to merge all filters in the existing collection
+   * to the filters in the other collection.  All merges are done as a result
+   * of a AND operator
+   *
+   * @param other The column filter from the other collection
+   */
+  def mergeIntersect(other:ColumnFilterCollection): Unit = {
+    other.columnFilterMap.foreach( e => {
+      val existingColumnFilter = columnFilterMap.get(e._1)
+      if (existingColumnFilter.isEmpty) {
+        columnFilterMap += e
+      } else {
+        existingColumnFilter.get.mergeIntersect(e._2)
+      }
+    })
+  }
+
+  override def toString:String = {
+    val strBuilder = new StringBuilder
+    columnFilterMap.foreach( e => strBuilder.append(e))
+    strBuilder.toString()
+  }
+}
+
+/**
+ * Status object to store static functions but also to hold last executed
+ * information that can be used for unit testing.
+ */
+@InterfaceAudience.Private
+object DefaultSourceStaticUtils {
+
+  val rawInteger = new RawInteger
+  val rawLong = new RawLong
+  val rawFloat = new RawFloat
+  val rawDouble = new RawDouble
+  val rawString = RawString.ASCENDING
+
+  val byteRange = new ThreadLocal[PositionedByteRange] {
+    override def initialValue(): PositionedByteRange = {
+      val range = new SimplePositionedMutableByteRange()
+      range.setOffset(0)
+      range.setPosition(0)
+    }
+  }
+
+  def getFreshByteRange(bytes: Array[Byte]): PositionedByteRange = {
+    getFreshByteRange(bytes, 0, bytes.length)
+  }
+
+  def getFreshByteRange(bytes: Array[Byte], offset: Int = 0, length: Int):
+  PositionedByteRange = {
+    byteRange.get().set(bytes).setLength(length).setOffset(offset)
+  }
+
+  //This will contain the last 5 filters and required fields used in buildScan
+  // These values can be used in unit testing to make sure we are converting
+  // The Spark SQL input correctly
+  val lastFiveExecutionRules =
+    new ConcurrentLinkedQueue[ExecutionRuleForUnitTesting]()
+
+  /**
+   * This method is to populate the lastFiveExecutionRules for unit test perposes
+   * This method is not thread safe.
+   *
+   * @param rowKeyFilter           The rowKey Filter logic used in the last query
+   * @param dynamicLogicExpression The dynamicLogicExpression used in the last query
+   */
+  def populateLatestExecutionRules(rowKeyFilter: RowKeyFilter,
+                                   dynamicLogicExpression: DynamicLogicExpression): Unit = {
+    lastFiveExecutionRules.add(new ExecutionRuleForUnitTesting(
+      rowKeyFilter, dynamicLogicExpression))
+    while (lastFiveExecutionRules.size() > 5) {
+      lastFiveExecutionRules.poll()
+    }
+  }
+
+  /**
+   * This method will convert the result content from HBase into the
+   * SQL value type that is requested by the Spark SQL schema definition
+   *
+   * @param field              The structure of the SparkSQL Column
+   * @param r                       The result object from HBase
+   * @return                        The converted object type
+   */
+  def getValue(field: Field,
+      r: Result): Any = {
+    if (field.isRowKey) {
+      val row = r.getRow
+
+      field.dt match {
+        case IntegerType => rawInteger.decode(getFreshByteRange(row))
+        case LongType => rawLong.decode(getFreshByteRange(row))
+        case FloatType => rawFloat.decode(getFreshByteRange(row))
+        case DoubleType => rawDouble.decode(getFreshByteRange(row))
+        case StringType => rawString.decode(getFreshByteRange(row))
+        case TimestampType => rawLong.decode(getFreshByteRange(row))
+        case _ => Bytes.toString(row)
+      }
+    } else {
+      val cellByteValue =
+        r.getColumnLatestCell(field.cfBytes, field.colBytes)
+      if (cellByteValue == null) null
+      else field.dt match {
+        case IntegerType => rawInteger.decode(getFreshByteRange(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength))
+        case LongType => rawLong.decode(getFreshByteRange(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength))
+        case FloatType => rawFloat.decode(getFreshByteRange(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength))
+        case DoubleType => rawDouble.decode(getFreshByteRange(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength))
+        case StringType => Bytes.toString(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength)
+        case TimestampType => rawLong.decode(getFreshByteRange(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength))
+        case _ => Bytes.toString(cellByteValue.getValueArray,
+          cellByteValue.getValueOffset, cellByteValue.getValueLength)
+      }
+    }
+  }
+
+  /**
+   * This will convert the value from SparkSQL to be stored into HBase using the
+   * right byte Type
+   *
+   * @param value                   String value from SparkSQL
+   * @return                        Returns the byte array to go into HBase
+   */
+  def getByteValue(field: Field,
+      value: String): Array[Byte] = {
+    field.dt match {
+      case IntegerType =>
+        val result = new Array[Byte](Bytes.SIZEOF_INT)
+        val localDataRange = getFreshByteRange(result)
+        rawInteger.encode(localDataRange, value.toInt)
+        localDataRange.getBytes
+      case LongType =>
+        val result = new Array[Byte](Bytes.SIZEOF_LONG)
+        val localDataRange = getFreshByteRange(result)
+        rawLong.encode(localDataRange, value.toLong)
+        localDataRange.getBytes
+      case FloatType =>
+        val result = new Array[Byte](Bytes.SIZEOF_FLOAT)
+        val localDataRange = getFreshByteRange(result)
+        rawFloat.encode(localDataRange, value.toFloat)
+        localDataRange.getBytes
+      case DoubleType =>
+        val result = new Array[Byte](Bytes.SIZEOF_DOUBLE)
+        val localDataRange = getFreshByteRange(result)
+        rawDouble.encode(localDataRange, value.toDouble)
+        localDataRange.getBytes
+      case StringType =>
+        Bytes.toBytes(value)
+      case TimestampType =>
+        val result = new Array[Byte](Bytes.SIZEOF_LONG)
+        val localDataRange = getFreshByteRange(result)
+        rawLong.encode(localDataRange, value.toLong)
+        localDataRange.getBytes
+
+      case _ => Bytes.toBytes(value)
+    }
+  }
+}
+
+/**
+ * Contains information related to a filters for a given column.
+ * This can contain many ranges or points.
+ *
+ * @param currentPoint the initial point when the filter is created
+ * @param currentRange the initial scanRange when the filter is created
+ */
+@InterfaceAudience.Private
+class RowKeyFilter (currentPoint:Array[Byte] = null,
+                    currentRange:ScanRange =
+                    new ScanRange(null, true, new Array[Byte](0), true),
+                    var points:mutable.MutableList[Array[Byte]] =
+                    new mutable.MutableList[Array[Byte]](),
+                    var ranges:mutable.MutableList[ScanRange] =
+                    new mutable.MutableList[ScanRange]() ) extends Serializable {
+  //Collection of ranges
+  if (currentRange != null ) ranges.+=(currentRange)
+
+  //Collection of points
+  if (currentPoint != null) points.+=(currentPoint)
+
+  /**
+   * This will validate a give value through the filter's points and/or ranges
+   * the result will be if the value passed the filter
+   *
+   * @param value       Value to be validated
+   * @param valueOffSet The offset of the value
+   * @param valueLength The length of the value
+   * @return            True is the value passes the filter false if not
+   */
+  def validate(value:Array[Byte], valueOffSet:Int, valueLength:Int):Boolean = {
+    var result = false
+
+    points.foreach( p => {
+      if (Bytes.equals(p, 0, p.length, value, valueOffSet, valueLength)) {
+        result = true
+      }
+    })
+
+    ranges.foreach( r => {
+      val upperBoundPass = r.upperBound == null ||
+        (r.isUpperBoundEqualTo &&
+          Bytes.compareTo(r.upperBound, 0, r.upperBound.length,
+            value, valueOffSet, valueLength) >= 0) ||
+        (!r.isUpperBoundEqualTo &&
+          Bytes.compareTo(r.upperBound, 0, r.upperBound.length,
+            value, valueOffSet, valueLength) > 0)
+
+      val lowerBoundPass = r.lowerBound == null || r.lowerBound.length == 0
+      (r.isLowerBoundEqualTo &&
+        Bytes.compareTo(r.lowerBound, 0, r.lowerBound.length,
+          value, valueOffSet, valueLength) <= 0) ||
+        (!r.isLowerBoundEqualTo &&
+          Bytes.compareTo(r.lowerBound, 0, r.lowerBound.length,
+            value, valueOffSet, valueLength) < 0)
+
+      result = result || (upperBoundPass && lowerBoundPass)
+    })
+    result
+  }
+
+  /**
+   * This will allow us to merge filter logic that is joined to the existing filter
+   * through a OR operator
+   *
+   * @param other Filter to merge
+   */
+  def mergeUnion(other:RowKeyFilter): RowKeyFilter = {
+    other.points.foreach( p => points += p)
+
+    other.ranges.foreach( otherR => {
+      var doesOverLap = false
+      ranges.foreach{ r =>
+        if (r.getOverLapScanRange(otherR) != null) {
+          r.mergeUnion(otherR)
+          doesOverLap = true
+        }}
+      if (!doesOverLap) ranges.+=(otherR)
+    })
+    this
+  }
+
+  /**
+   * This will allow us to merge filter logic that is joined to the existing filter
+   * through a AND operator
+   *
+   * @param other Filter to merge
+   */
+  def mergeIntersect(other:RowKeyFilter): RowKeyFilter = {
+    val survivingPoints = new mutable.MutableList[Array[Byte]]()
+    val didntSurviveFirstPassPoints = new mutable.MutableList[Array[Byte]]()
+    if (points == null || points.length == 0) {
+      other.points.foreach( otherP => {
+        didntSurviveFirstPassPoints += otherP
+      })
+    } else {
+      points.foreach(p => {
+        if (other.points.length == 0) {
+          didntSurviveFirstPassPoints += p
+        } else {
+          other.points.foreach(otherP => {
+            if (Bytes.equals(p, otherP)) {
+              survivingPoints += p
+            } else {
+              didntSurviveFirstPassPoints += p
+            }
+          })
+        }
+      })
+    }
+
+    val survivingRanges = new mutable.MutableList[ScanRange]()
+
+    if (ranges.length == 0) {
+      didntSurviveFirstPassPoints.foreach(p => {
+          survivingPoints += p
+      })
+    } else {
+      ranges.foreach(r => {
+        other.ranges.foreach(otherR => {
+          val overLapScanRange = r.getOverLapScanRange(otherR)
+          if (overLapScanRange != null) {
+            survivingRanges += overLapScanRange
+          }
+        })
+        didntSurviveFirstPassPoints.foreach(p => {
+          if (r.containsPoint(p)) {
+            survivingPoints += p
+          }
+        })
+      })
+    }
+    points = survivingPoints
+    ranges = survivingRanges
+    this
+  }
+
+  override def toString:String = {
+    val strBuilder = new StringBuilder
+    strBuilder.append("(points:(")
+    var isFirst = true
+    points.foreach( p => {
+      if (isFirst) isFirst = false
+      else strBuilder.append(",")
+      strBuilder.append(Bytes.toString(p))
+    })
+    strBuilder.append("),ranges:")
+    isFirst = true
+    ranges.foreach( r => {
+      if (isFirst) isFirst = false
+      else strBuilder.append(",")
+      strBuilder.append(r)
+    })
+    strBuilder.append("))")
+    strBuilder.toString()
+  }
+}
+
+@InterfaceAudience.Private
+class ExecutionRuleForUnitTesting(val rowKeyFilter: RowKeyFilter,
+                                  val dynamicLogicExpression: DynamicLogicExpression)
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpression.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpression.scala
new file mode 100644
index 0000000..4c35a7b
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpression.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.spark.datasources.{BytesEncoder, JavaBytesEncoder}
+import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * Dynamic logic for SQL push down logic there is an instance for most
+ * common operations and a pass through for other operations not covered here
+ *
+ * Logic can be nested with And or Or operators.
+ *
+ * A logic tree can be written out as a string and reconstructed from that string
+ *
+ */
+@InterfaceAudience.Private
+trait DynamicLogicExpression {
+  def execute(columnToCurrentRowValueMap: util.HashMap[String, ByteArrayComparable],
+              valueFromQueryValueArray:Array[Array[Byte]]): Boolean
+  def toExpressionString: String = {
+    val strBuilder = new StringBuilder
+    appendToExpression(strBuilder)
+    strBuilder.toString()
+  }
+  def filterOps: JavaBytesEncoder = JavaBytesEncoder.Unknown
+
+  def appendToExpression(strBuilder:StringBuilder)
+
+  var encoder: BytesEncoder = _
+
+  def setEncoder(enc: BytesEncoder): DynamicLogicExpression = {
+    encoder = enc
+    this
+  }
+}
+
+@InterfaceAudience.Private
+trait CompareTrait {
+  self: DynamicLogicExpression =>
+  def columnName: String
+  def valueFromQueryIndex: Int
+  def execute(columnToCurrentRowValueMap:
+              util.HashMap[String, ByteArrayComparable],
+              valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
+    val currentRowValue = columnToCurrentRowValueMap.get(columnName)
+    val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
+    currentRowValue != null &&
+      encoder.filter(currentRowValue.bytes, currentRowValue.offset, currentRowValue.length,
+        valueFromQuery, 0, valueFromQuery.length, filterOps)
+  }
+}
+
+@InterfaceAudience.Private
+class AndLogicExpression (val leftExpression:DynamicLogicExpression,
+                           val rightExpression:DynamicLogicExpression)
+  extends DynamicLogicExpression{
+  override def execute(columnToCurrentRowValueMap:
+                       util.HashMap[String, ByteArrayComparable],
+                       valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
+    leftExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray) &&
+      rightExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray)
+  }
+
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append("( ")
+    strBuilder.append(leftExpression.toExpressionString)
+    strBuilder.append(" AND ")
+    strBuilder.append(rightExpression.toExpressionString)
+    strBuilder.append(" )")
+  }
+}
+
+@InterfaceAudience.Private
+class OrLogicExpression (val leftExpression:DynamicLogicExpression,
+                          val rightExpression:DynamicLogicExpression)
+  extends DynamicLogicExpression{
+  override def execute(columnToCurrentRowValueMap:
+                       util.HashMap[String, ByteArrayComparable],
+                       valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
+    leftExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray) ||
+      rightExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray)
+  }
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append("( ")
+    strBuilder.append(leftExpression.toExpressionString)
+    strBuilder.append(" OR ")
+    strBuilder.append(rightExpression.toExpressionString)
+    strBuilder.append(" )")
+  }
+}
+
+@InterfaceAudience.Private
+class EqualLogicExpression (val columnName:String,
+                            val valueFromQueryIndex:Int,
+                            val isNot:Boolean) extends DynamicLogicExpression{
+  override def execute(columnToCurrentRowValueMap:
+                       util.HashMap[String, ByteArrayComparable],
+                       valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
+    val currentRowValue = columnToCurrentRowValueMap.get(columnName)
+    val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
+
+    currentRowValue != null &&
+      Bytes.equals(valueFromQuery,
+        0, valueFromQuery.length, currentRowValue.bytes,
+        currentRowValue.offset, currentRowValue.length) != isNot
+  }
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    val command = if (isNot) "!=" else "=="
+    strBuilder.append(columnName + " " + command + " " + valueFromQueryIndex)
+  }
+}
+
+@InterfaceAudience.Private
+class IsNullLogicExpression (val columnName:String,
+                             val isNot:Boolean) extends DynamicLogicExpression{
+  override def execute(columnToCurrentRowValueMap:
+                       util.HashMap[String, ByteArrayComparable],
+                       valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
+    val currentRowValue = columnToCurrentRowValueMap.get(columnName)
+
+    (currentRowValue == null) != isNot
+  }
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    val command = if (isNot) "isNotNull" else "isNull"
+    strBuilder.append(columnName + " " + command)
+  }
+}
+
+@InterfaceAudience.Private
+class GreaterThanLogicExpression (override val columnName:String,
+                                  override val valueFromQueryIndex:Int)
+  extends DynamicLogicExpression with CompareTrait{
+  override val filterOps = JavaBytesEncoder.Greater
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append(columnName + " > " + valueFromQueryIndex)
+  }
+}
+
+@InterfaceAudience.Private
+class GreaterThanOrEqualLogicExpression (override val columnName:String,
+                                         override val valueFromQueryIndex:Int)
+  extends DynamicLogicExpression with CompareTrait{
+  override val filterOps = JavaBytesEncoder.GreaterEqual
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append(columnName + " >= " + valueFromQueryIndex)
+  }
+}
+
+@InterfaceAudience.Private
+class LessThanLogicExpression (override val columnName:String,
+                               override val valueFromQueryIndex:Int)
+  extends DynamicLogicExpression with CompareTrait {
+  override val filterOps = JavaBytesEncoder.Less
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append(columnName + " < " + valueFromQueryIndex)
+  }
+}
+
+@InterfaceAudience.Private
+class LessThanOrEqualLogicExpression (val columnName:String,
+                                      val valueFromQueryIndex:Int)
+  extends DynamicLogicExpression with CompareTrait{
+  override val filterOps = JavaBytesEncoder.LessEqual
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    strBuilder.append(columnName + " <= " + valueFromQueryIndex)
+  }
+}
+
+@InterfaceAudience.Private
+class PassThroughLogicExpression() extends DynamicLogicExpression {
+  override def execute(columnToCurrentRowValueMap:
+                       util.HashMap[String, ByteArrayComparable],
+                       valueFromQueryValueArray: Array[Array[Byte]]): Boolean = true
+
+  override def appendToExpression(strBuilder: StringBuilder): Unit = {
+    // Fix the offset bug by add dummy to avoid crash the region server.
+    // because in the DynamicLogicExpressionBuilder.build function, the command is always retrieved from offset + 1 as below
+    // val command = expressionArray(offSet + 1)
+    // we have to padding it so that `Pass` is on the right offset.
+    strBuilder.append("dummy Pass -1")
+  }
+}
+
+@InterfaceAudience.Private
+object DynamicLogicExpressionBuilder {
+  def build(expressionString: String, encoder: BytesEncoder): DynamicLogicExpression = {
+
+    val expressionAndOffset = build(expressionString.split(' '), 0, encoder)
+    expressionAndOffset._1
+  }
+
+  private def build(expressionArray:Array[String],
+                    offSet:Int, encoder: BytesEncoder): (DynamicLogicExpression, Int) = {
+    val expr = {
+      if (expressionArray(offSet).equals("(")) {
+        val left = build(expressionArray, offSet + 1, encoder)
+        val right = build(expressionArray, left._2 + 1, encoder)
+        if (expressionArray(left._2).equals("AND")) {
+          (new AndLogicExpression(left._1, right._1), right._2 + 1)
+        } else if (expressionArray(left._2).equals("OR")) {
+          (new OrLogicExpression(left._1, right._1), right._2 + 1)
+        } else {
+          throw new Throwable("Unknown gate:" + expressionArray(left._2))
+        }
+      } else {
+        val command = expressionArray(offSet + 1)
+        if (command.equals("<")) {
+          (new LessThanLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt), offSet + 3)
+        } else if (command.equals("<=")) {
+          (new LessThanOrEqualLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt), offSet + 3)
+        } else if (command.equals(">")) {
+          (new GreaterThanLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt), offSet + 3)
+        } else if (command.equals(">=")) {
+          (new GreaterThanOrEqualLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt), offSet + 3)
+        } else if (command.equals("==")) {
+          (new EqualLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt, false), offSet + 3)
+        } else if (command.equals("!=")) {
+          (new EqualLogicExpression(expressionArray(offSet),
+            expressionArray(offSet + 2).toInt, true), offSet + 3)
+        } else if (command.equals("isNull")) {
+          (new IsNullLogicExpression(expressionArray(offSet), false), offSet + 2)
+        } else if (command.equals("isNotNull")) {
+          (new IsNullLogicExpression(expressionArray(offSet), true), offSet + 2)
+        } else if (command.equals("Pass")) {
+          (new PassThroughLogicExpression, offSet + 3)
+        } else {
+          throw new Throwable("Unknown logic command:" + command)
+        }
+      }
+    }
+    expr._1.setEncoder(encoder)
+    expr
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala
new file mode 100644
index 0000000..7a651e1
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import java.util
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This object is a clean way to store and sort all cells that will be bulk
+ * loaded into a single row
+ */
+@InterfaceAudience.Public
+class FamiliesQualifiersValues extends Serializable {
+  //Tree maps are used because we need the results to
+  // be sorted when we read them
+  val familyMap = new util.TreeMap[ByteArrayWrapper,
+    util.TreeMap[ByteArrayWrapper, Array[Byte]]]()
+
+  //normally in a row there are more columns then
+  //column families this wrapper is reused for column
+  //family look ups
+  val reusableWrapper = new ByteArrayWrapper(null)
+
+  /**
+   * Adds a new cell to an existing row
+   * @param family    HBase column family
+   * @param qualifier HBase column qualifier
+   * @param value     HBase cell value
+   */
+  def += (family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = {
+
+    reusableWrapper.value = family
+
+    var qualifierValues = familyMap.get(reusableWrapper)
+
+    if (qualifierValues == null) {
+      qualifierValues = new util.TreeMap[ByteArrayWrapper, Array[Byte]]()
+      familyMap.put(new ByteArrayWrapper(family), qualifierValues)
+    }
+
+    qualifierValues.put(new ByteArrayWrapper(qualifier), value)
+  }
+
+  /**
+    * A wrapper for "+=" method above, can be used by Java
+    * @param family    HBase column family
+    * @param qualifier HBase column qualifier
+    * @param value     HBase cell value
+    */
+  def add(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = {
+    this += (family, qualifier, value)
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala
new file mode 100644
index 0000000..9ee9291
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.io.Serializable
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This object will hold optional data for how a given column family's
+ * writer will work
+ *
+ * @param compression       String to define the Compression to be used in the HFile
+ * @param bloomType         String to define the bloom type to be used in the HFile
+ * @param blockSize         The block size to be used in the HFile
+ * @param dataBlockEncoding String to define the data block encoding to be used
+ *                          in the HFile
+ */
+@InterfaceAudience.Public
+class FamilyHFileWriteOptions( val compression:String,
+                               val bloomType: String,
+                               val blockSize: Int,
+                               val dataBlockEncoding: String) extends Serializable
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala
new file mode 100644
index 0000000..138c224
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.io.IOException
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.client.Admin
+import org.apache.hadoop.hbase.client.Connection
+import org.apache.hadoop.hbase.client.ConnectionFactory
+import org.apache.hadoop.hbase.client.RegionLocator
+import org.apache.hadoop.hbase.client.Table
+import org.apache.hadoop.hbase.ipc.RpcControllerFactory
+import org.apache.hadoop.hbase.security.User
+import org.apache.hadoop.hbase.security.UserProvider
+import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf
+import org.apache.hadoop.hbase.HConstants
+import org.apache.hadoop.hbase.TableName
+import org.apache.yetus.audience.InterfaceAudience
+import scala.collection.mutable
+
+@InterfaceAudience.Private
+private[spark] object HBaseConnectionCache extends Logging {
+
+  // A hashmap of Spark-HBase connections. Key is HBaseConnectionKey.
+  val connectionMap = new mutable.HashMap[HBaseConnectionKey, SmartConnection]()
+
+  val cacheStat = HBaseConnectionCacheStat(0, 0, 0)
+
+  // in milliseconds
+  private final val DEFAULT_TIME_OUT: Long = HBaseSparkConf.DEFAULT_CONNECTION_CLOSE_DELAY
+  private var timeout = DEFAULT_TIME_OUT
+  private var closed: Boolean = false
+
+  var housekeepingThread = new Thread(new Runnable {
+    override def run() {
+      while (true) {
+        try {
+          Thread.sleep(timeout)
+        } catch {
+          case e: InterruptedException =>
+            // setTimeout() and close() may interrupt the sleep and it's safe
+            // to ignore the exception
+        }
+        if (closed)
+          return
+        performHousekeeping(false)
+      }
+    }
+  })
+  housekeepingThread.setDaemon(true)
+  housekeepingThread.start()
+
+  def getStat: HBaseConnectionCacheStat = {
+    connectionMap.synchronized {
+      cacheStat.numActiveConnections = connectionMap.size
+      cacheStat.copy()
+    }
+  }
+
+  def close(): Unit = {
+    try {
+      connectionMap.synchronized {
+        if (closed)
+          return
+        closed = true
+        housekeepingThread.interrupt()
+        housekeepingThread = null
+        HBaseConnectionCache.performHousekeeping(true)
+      }
+    } catch {
+      case e: Exception => logWarning("Error in finalHouseKeeping", e)
+    }
+  }
+
+  def performHousekeeping(forceClean: Boolean) = {
+    val tsNow: Long = System.currentTimeMillis()
+    connectionMap.synchronized {
+      connectionMap.foreach {
+        x => {
+          if(x._2.refCount < 0) {
+            logError(s"Bug to be fixed: negative refCount of connection ${x._2}")
+          }
+
+          if(forceClean || ((x._2.refCount <= 0) && (tsNow - x._2.timestamp > timeout))) {
+            try{
+              x._2.connection.close()
+            } catch {
+              case e: IOException => logWarning(s"Fail to close connection ${x._2}", e)
+            }
+            connectionMap.remove(x._1)
+          }
+        }
+      }
+    }
+  }
+
+  // For testing purpose only
+  def getConnection(key: HBaseConnectionKey, conn: => Connection): SmartConnection = {
+    connectionMap.synchronized {
+      if (closed)
+        return null
+      cacheStat.numTotalRequests += 1
+      val sc = connectionMap.getOrElseUpdate(key, {cacheStat.numActualConnectionsCreated += 1
+        new SmartConnection(conn)})
+      sc.refCount += 1
+      sc
+    }
+  }
+
+  def getConnection(conf: Configuration): SmartConnection =
+    getConnection(new HBaseConnectionKey(conf), ConnectionFactory.createConnection(conf))
+
+  // For testing purpose only
+  def setTimeout(to: Long): Unit  = {
+    connectionMap.synchronized {
+      if (closed)
+        return
+      timeout = to
+      housekeepingThread.interrupt()
+    }
+  }
+}
+
+@InterfaceAudience.Private
+private[hbase] case class SmartConnection (
+    connection: Connection, var refCount: Int = 0, var timestamp: Long = 0) {
+  def getTable(tableName: TableName): Table = connection.getTable(tableName)
+  def getRegionLocator(tableName: TableName): RegionLocator = connection.getRegionLocator(tableName)
+  def isClosed: Boolean = connection.isClosed
+  def getAdmin: Admin = connection.getAdmin
+  def close() = {
+    HBaseConnectionCache.connectionMap.synchronized {
+      refCount -= 1
+      if(refCount <= 0)
+        timestamp = System.currentTimeMillis()
+    }
+  }
+}
+
+/**
+ * Denotes a unique key to an HBase Connection instance.
+ * Please refer to 'org.apache.hadoop.hbase.client.HConnectionKey'.
+ *
+ * In essence, this class captures the properties in Configuration
+ * that may be used in the process of establishing a connection.
+ *
+ */
+@InterfaceAudience.Private
+class HBaseConnectionKey(c: Configuration) extends Logging {
+  val conf: Configuration = c
+  val CONNECTION_PROPERTIES: Array[String] = Array[String](
+    HConstants.ZOOKEEPER_QUORUM,
+    HConstants.ZOOKEEPER_ZNODE_PARENT,
+    HConstants.ZOOKEEPER_CLIENT_PORT,
+    HConstants.HBASE_CLIENT_PAUSE,
+    HConstants.HBASE_CLIENT_RETRIES_NUMBER,
+    HConstants.HBASE_RPC_TIMEOUT_KEY,
+    HConstants.HBASE_META_SCANNER_CACHING,
+    HConstants.HBASE_CLIENT_INSTANCE_ID,
+    HConstants.RPC_CODEC_CONF_KEY,
+    HConstants.USE_META_REPLICAS,
+    RpcControllerFactory.CUSTOM_CONTROLLER_CONF_KEY)
+
+  var username: String = _
+  var m_properties = mutable.HashMap.empty[String, String]
+  if (conf != null) {
+    for (property <- CONNECTION_PROPERTIES) {
+      val value: String = conf.get(property)
+      if (value != null) {
+        m_properties.+=((property, value))
+      }
+    }
+    try {
+      val provider: UserProvider = UserProvider.instantiate(conf)
+      val currentUser: User = provider.getCurrent
+      if (currentUser != null) {
+        username = currentUser.getName
+      }
+    }
+    catch {
+      case e: IOException => {
+        logWarning("Error obtaining current user, skipping username in HBaseConnectionKey", e)
+      }
+    }
+  }
+
+  // make 'properties' immutable
+  val properties = m_properties.toMap
+
+  override def hashCode: Int = {
+    val prime: Int = 31
+    var result: Int = 1
+    if (username != null) {
+      result = username.hashCode
+    }
+    for (property <- CONNECTION_PROPERTIES) {
+      val value: Option[String] = properties.get(property)
+      if (value.isDefined) {
+        result = prime * result + value.hashCode
+      }
+    }
+    result
+  }
+
+  override def equals(obj: Any): Boolean = {
+    if (obj == null) return false
+    if (getClass ne obj.getClass) return false
+    val that: HBaseConnectionKey = obj.asInstanceOf[HBaseConnectionKey]
+    if (this.username != null && !(this.username == that.username)) {
+      return false
+    }
+    else if (this.username == null && that.username != null) {
+      return false
+    }
+    if (this.properties == null) {
+      if (that.properties != null) {
+        return false
+      }
+    }
+    else {
+      if (that.properties == null) {
+        return false
+      }
+      var flag: Boolean = true
+      for (property <- CONNECTION_PROPERTIES) {
+        val thisValue: Option[String] = this.properties.get(property)
+        val thatValue: Option[String] = that.properties.get(property)
+        flag = true
+        if (thisValue eq thatValue) {
+          flag = false //continue, so make flag be false
+        }
+        if (flag && (thisValue == null || !(thisValue == thatValue))) {
+          return false
+        }
+      }
+    }
+    true
+  }
+
+  override def toString: String = {
+    "HBaseConnectionKey{" + "properties=" + properties + ", username='" + username + '\'' + '}'
+  }
+}
+
+/**
+ * To log the state of 'HBaseConnectionCache'
+ *
+ * @param numTotalRequests number of total connection requests to the cache
+ * @param numActualConnectionsCreated number of actual HBase connections the cache ever created
+ * @param numActiveConnections number of current alive HBase connections the cache is holding
+ */
+@InterfaceAudience.Private
+case class HBaseConnectionCacheStat(var numTotalRequests: Long,
+                                    var numActualConnectionsCreated: Long,
+                                    var numActiveConnections: Long)
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala
new file mode 100644
index 0000000..e50a3e8
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala
@@ -0,0 +1,1126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.net.InetSocketAddress
+import java.util
+import java.util.UUID
+import javax.management.openmbean.KeyAlreadyExistsException
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.fs.HFileSystem
+import org.apache.hadoop.hbase._
+import org.apache.hadoop.hbase.io.compress.Compression
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding
+import org.apache.hadoop.hbase.io.hfile.{HFile, CacheConfig, HFileContextBuilder, HFileWriterImpl}
+import org.apache.hadoop.hbase.regionserver.{HStore, HStoreFile, StoreFileWriter, BloomType}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.mapred.JobConf
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.rdd.RDD
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.client._
+import scala.reflect.ClassTag
+import org.apache.spark.{SerializableWritable, SparkContext}
+import org.apache.hadoop.hbase.mapreduce.{TableMapReduceUtil,
+TableInputFormat, IdentityTableMapper}
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.hadoop.mapreduce.Job
+import org.apache.spark.streaming.dstream.DStream
+import java.io._
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod
+import org.apache.hadoop.fs.{Path, FileAlreadyExistsException, FileSystem}
+import scala.collection.mutable
+
+/**
+  * HBaseContext is a façade for HBase operations
+  * like bulk put, get, increment, delete, and scan
+  *
+  * HBaseContext will take the responsibilities
+  * of disseminating the configuration information
+  * to the working and managing the life cycle of Connections.
+ */
+@InterfaceAudience.Public
+class HBaseContext(@transient val sc: SparkContext,
+                   @transient val config: Configuration,
+                   val tmpHdfsConfgFile: String = null)
+  extends Serializable with Logging {
+
+  @transient var credentials = UserGroupInformation.getCurrentUser().getCredentials()
+  @transient var tmpHdfsConfiguration:Configuration = config
+  @transient var appliedCredentials = false
+  @transient val job = Job.getInstance(config)
+  TableMapReduceUtil.initCredentials(job)
+  val broadcastedConf = sc.broadcast(new SerializableWritable(config))
+  val credentialsConf = sc.broadcast(new SerializableWritable(job.getCredentials))
+
+  LatestHBaseContextCache.latest = this
+
+  if (tmpHdfsConfgFile != null && config != null) {
+    val fs = FileSystem.newInstance(config)
+    val tmpPath = new Path(tmpHdfsConfgFile)
+    if (!fs.exists(tmpPath)) {
+      val outputStream = fs.create(tmpPath)
+      config.write(outputStream)
+      outputStream.close()
+    } else {
+      logWarning("tmpHdfsConfigDir " + tmpHdfsConfgFile + " exist!!")
+    }
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark RDD foreachPartition.
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * @param rdd  Original RDD with data to iterate over
+   * @param f    Function to be given a iterator to iterate through
+   *             the RDD values and a Connection object to interact
+   *             with HBase
+   */
+  def foreachPartition[T](rdd: RDD[T],
+                          f: (Iterator[T], Connection) => Unit):Unit = {
+    rdd.foreachPartition(
+      it => hbaseForeachPartition(broadcastedConf, it, f))
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark Streaming dStream foreach
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * @param dstream  Original DStream with data to iterate over
+   * @param f        Function to be given a iterator to iterate through
+   *                 the DStream values and a Connection object to
+   *                 interact with HBase
+   */
+  def foreachPartition[T](dstream: DStream[T],
+                    f: (Iterator[T], Connection) => Unit):Unit = {
+    dstream.foreachRDD((rdd, time) => {
+      foreachPartition(rdd, f)
+    })
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark RDD mapPartition.
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * @param rdd  Original RDD with data to iterate over
+   * @param mp   Function to be given a iterator to iterate through
+   *             the RDD values and a Connection object to interact
+   *             with HBase
+   * @return     Returns a new RDD generated by the user definition
+   *             function just like normal mapPartition
+   */
+  def mapPartitions[T, R: ClassTag](rdd: RDD[T],
+                                   mp: (Iterator[T], Connection) => Iterator[R]): RDD[R] = {
+
+    rdd.mapPartitions[R](it => hbaseMapPartition[T, R](broadcastedConf,
+      it,
+      mp))
+
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark Streaming DStream
+   * foreachPartition.
+   *
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * Note: Make sure to partition correctly to avoid memory issue when
+   *       getting data from HBase
+   *
+   * @param dstream  Original DStream with data to iterate over
+   * @param f       Function to be given a iterator to iterate through
+   *                 the DStream values and a Connection object to
+   *                 interact with HBase
+   * @return         Returns a new DStream generated by the user
+   *                 definition function just like normal mapPartition
+   */
+  def streamForeachPartition[T](dstream: DStream[T],
+                                f: (Iterator[T], Connection) => Unit): Unit = {
+
+    dstream.foreachRDD(rdd => this.foreachPartition(rdd, f))
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark Streaming DStream
+   * mapPartition.
+   *
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * Note: Make sure to partition correctly to avoid memory issue when
+   *       getting data from HBase
+   *
+   * @param dstream  Original DStream with data to iterate over
+   * @param f       Function to be given a iterator to iterate through
+   *                 the DStream values and a Connection object to
+   *                 interact with HBase
+   * @return         Returns a new DStream generated by the user
+   *                 definition function just like normal mapPartition
+   */
+  def streamMapPartitions[T, U: ClassTag](dstream: DStream[T],
+                                f: (Iterator[T], Connection) => Iterator[U]):
+  DStream[U] = {
+    dstream.mapPartitions(it => hbaseMapPartition[T, U](
+      broadcastedConf,
+      it,
+      f))
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.foreachPartition method.
+   *
+   * It allow addition support for a user to take RDD
+   * and generate puts and send them to HBase.
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param rdd       Original RDD with data to iterate over
+   * @param tableName The name of the table to put into
+   * @param f         Function to convert a value in the RDD to a HBase Put
+   */
+  def bulkPut[T](rdd: RDD[T], tableName: TableName, f: (T) => Put) {
+
+    val tName = tableName.getName
+    rdd.foreachPartition(
+      it => hbaseForeachPartition[T](
+        broadcastedConf,
+        it,
+        (iterator, connection) => {
+          val m = connection.getBufferedMutator(TableName.valueOf(tName))
+          iterator.foreach(T => m.mutate(f(T)))
+          m.flush()
+          m.close()
+        }))
+  }
+
+  def applyCreds[T] (){
+    credentials = UserGroupInformation.getCurrentUser().getCredentials()
+
+    if (log.isDebugEnabled) {
+      logDebug("appliedCredentials:" + appliedCredentials + ",credentials:" + credentials)
+    }
+
+    if (!appliedCredentials && credentials != null) {
+      appliedCredentials = true
+
+      @transient val ugi = UserGroupInformation.getCurrentUser
+      ugi.addCredentials(credentials)
+      // specify that this is a proxy user
+      ugi.setAuthenticationMethod(AuthenticationMethod.PROXY)
+
+      ugi.addCredentials(credentialsConf.value.value)
+    }
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamMapPartition method.
+   *
+   * It allow addition support for a user to take a DStream and
+   * generate puts and send them to HBase.
+   *
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param dstream    Original DStream with data to iterate over
+   * @param tableName  The name of the table to put into
+   * @param f          Function to convert a value in
+   *                   the DStream to a HBase Put
+   */
+  def streamBulkPut[T](dstream: DStream[T],
+                       tableName: TableName,
+                       f: (T) => Put) = {
+    val tName = tableName.getName
+    dstream.foreachRDD((rdd, time) => {
+      bulkPut(rdd, TableName.valueOf(tName), f)
+    })
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.foreachPartition method.
+   *
+   * It allow addition support for a user to take a RDD and generate delete
+   * and send them to HBase.  The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param rdd       Original RDD with data to iterate over
+   * @param tableName The name of the table to delete from
+   * @param f         Function to convert a value in the RDD to a
+   *                  HBase Deletes
+   * @param batchSize       The number of delete to batch before sending to HBase
+   */
+  def bulkDelete[T](rdd: RDD[T], tableName: TableName,
+                    f: (T) => Delete, batchSize: Integer) {
+    bulkMutation(rdd, tableName, f, batchSize)
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamBulkMutation method.
+   *
+   * It allow addition support for a user to take a DStream and
+   * generate Delete and send them to HBase.
+   *
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param dstream    Original DStream with data to iterate over
+   * @param tableName  The name of the table to delete from
+   * @param f          function to convert a value in the DStream to a
+   *                   HBase Delete
+   * @param batchSize        The number of deletes to batch before sending to HBase
+   */
+  def streamBulkDelete[T](dstream: DStream[T],
+                          tableName: TableName,
+                          f: (T) => Delete,
+                          batchSize: Integer) = {
+    streamBulkMutation(dstream, tableName, f, batchSize)
+  }
+
+  /**
+   *  Under lining function to support all bulk mutations
+   *
+   *  May be opened up if requested
+   */
+  private def bulkMutation[T](rdd: RDD[T], tableName: TableName,
+                              f: (T) => Mutation, batchSize: Integer) {
+
+    val tName = tableName.getName
+    rdd.foreachPartition(
+      it => hbaseForeachPartition[T](
+        broadcastedConf,
+        it,
+        (iterator, connection) => {
+          val table = connection.getTable(TableName.valueOf(tName))
+          val mutationList = new java.util.ArrayList[Mutation]
+          iterator.foreach(T => {
+            mutationList.add(f(T))
+            if (mutationList.size >= batchSize) {
+              table.batch(mutationList, null)
+              mutationList.clear()
+            }
+          })
+          if (mutationList.size() > 0) {
+            table.batch(mutationList, null)
+            mutationList.clear()
+          }
+          table.close()
+        }))
+  }
+
+  /**
+   *  Under lining function to support all bulk streaming mutations
+   *
+   *  May be opened up if requested
+   */
+  private def streamBulkMutation[T](dstream: DStream[T],
+                                    tableName: TableName,
+                                    f: (T) => Mutation,
+                                    batchSize: Integer) = {
+    val tName = tableName.getName
+    dstream.foreachRDD((rdd, time) => {
+      bulkMutation(rdd, TableName.valueOf(tName), f, batchSize)
+    })
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.mapPartition method.
+   *
+   * It allow addition support for a user to take a RDD and generates a
+   * new RDD based on Gets and the results they bring back from HBase
+   *
+   * @param rdd     Original RDD with data to iterate over
+   * @param tableName        The name of the table to get from
+   * @param makeGet    function to convert a value in the RDD to a
+   *                   HBase Get
+   * @param convertResult This will convert the HBase Result object to
+   *                   what ever the user wants to put in the resulting
+   *                   RDD
+   * return            new RDD that is created by the Get to HBase
+   */
+  def bulkGet[T, U: ClassTag](tableName: TableName,
+                    batchSize: Integer,
+                    rdd: RDD[T],
+                    makeGet: (T) => Get,
+                    convertResult: (Result) => U): RDD[U] = {
+
+    val getMapPartition = new GetMapPartition(tableName,
+      batchSize,
+      makeGet,
+      convertResult)
+
+    rdd.mapPartitions[U](it =>
+      hbaseMapPartition[T, U](
+        broadcastedConf,
+        it,
+        getMapPartition.run))
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamMap method.
+   *
+   * It allow addition support for a user to take a DStream and
+   * generates a new DStream based on Gets and the results
+   * they bring back from HBase
+   *
+   * @param tableName     The name of the table to get from
+   * @param batchSize     The number of Gets to be sent in a single batch
+   * @param dStream       Original DStream with data to iterate over
+   * @param makeGet       Function to convert a value in the DStream to a
+   *                      HBase Get
+   * @param convertResult This will convert the HBase Result object to
+   *                      what ever the user wants to put in the resulting
+   *                      DStream
+   * @return              A new DStream that is created by the Get to HBase
+   */
+  def streamBulkGet[T, U: ClassTag](tableName: TableName,
+                                    batchSize: Integer,
+                                    dStream: DStream[T],
+                                    makeGet: (T) => Get,
+                                    convertResult: (Result) => U): DStream[U] = {
+
+    val getMapPartition = new GetMapPartition(tableName,
+      batchSize,
+      makeGet,
+      convertResult)
+
+    dStream.mapPartitions[U](it => hbaseMapPartition[T, U](
+      broadcastedConf,
+      it,
+      getMapPartition.run))
+  }
+
+  /**
+   * This function will use the native HBase TableInputFormat with the
+   * given scan object to generate a new RDD
+   *
+   *  @param tableName the name of the table to scan
+   *  @param scan      the HBase scan object to use to read data from HBase
+   *  @param f         function to convert a Result object from HBase into
+   *                   what the user wants in the final generated RDD
+   *  @return          new RDD with results from scan
+   */
+  def hbaseRDD[U: ClassTag](tableName: TableName, scan: Scan,
+                            f: ((ImmutableBytesWritable, Result)) => U): RDD[U] = {
+
+    val job: Job = Job.getInstance(getConf(broadcastedConf))
+
+    TableMapReduceUtil.initCredentials(job)
+    TableMapReduceUtil.initTableMapperJob(tableName, scan,
+      classOf[IdentityTableMapper], null, null, job)
+
+    val jconf = new JobConf(job.getConfiguration)
+    SparkHadoopUtil.get.addCredentials(jconf)
+    new NewHBaseRDD(sc,
+      classOf[TableInputFormat],
+      classOf[ImmutableBytesWritable],
+      classOf[Result],
+      job.getConfiguration,
+      this).map(f)
+  }
+
+  /**
+   * A overloaded version of HBaseContext hbaseRDD that defines the
+   * type of the resulting RDD
+   *
+   *  @param tableName the name of the table to scan
+   *  @param scans     the HBase scan object to use to read data from HBase
+   *  @return          New RDD with results from scan
+   *
+   */
+  def hbaseRDD(tableName: TableName, scans: Scan):
+  RDD[(ImmutableBytesWritable, Result)] = {
+
+    hbaseRDD[(ImmutableBytesWritable, Result)](
+      tableName,
+      scans,
+      (r: (ImmutableBytesWritable, Result)) => r)
+  }
+
+  /**
+   *  underlining wrapper all foreach functions in HBaseContext
+   */
+  private def hbaseForeachPartition[T](configBroadcast:
+                                       Broadcast[SerializableWritable[Configuration]],
+                                        it: Iterator[T],
+                                        f: (Iterator[T], Connection) => Unit) = {
+
+    val config = getConf(configBroadcast)
+
+    applyCreds
+    // specify that this is a proxy user
+    val smartConn = HBaseConnectionCache.getConnection(config)
+    f(it, smartConn.connection)
+    smartConn.close()
+  }
+
+  private def getConf(configBroadcast: Broadcast[SerializableWritable[Configuration]]):
+  Configuration = {
+
+    if (tmpHdfsConfiguration == null && tmpHdfsConfgFile != null) {
+      val fs = FileSystem.newInstance(SparkHadoopUtil.get.conf)
+      val inputStream = fs.open(new Path(tmpHdfsConfgFile))
+      tmpHdfsConfiguration = new Configuration(false)
+      tmpHdfsConfiguration.readFields(inputStream)
+      inputStream.close()
+    }
+
+    if (tmpHdfsConfiguration == null) {
+      try {
+        tmpHdfsConfiguration = configBroadcast.value.value
+      } catch {
+        case ex: Exception => logError("Unable to getConfig from broadcast", ex)
+      }
+    }
+    tmpHdfsConfiguration
+  }
+
+  /**
+   *  underlining wrapper all mapPartition functions in HBaseContext
+   *
+   */
+  private def hbaseMapPartition[K, U](
+                                       configBroadcast:
+                                       Broadcast[SerializableWritable[Configuration]],
+                                       it: Iterator[K],
+                                       mp: (Iterator[K], Connection) =>
+                                         Iterator[U]): Iterator[U] = {
+
+    val config = getConf(configBroadcast)
+    applyCreds
+
+    val smartConn = HBaseConnectionCache.getConnection(config)
+    val res = mp(it, smartConn.connection)
+    smartConn.close()
+    res
+  }
+
+  /**
+   *  underlining wrapper all get mapPartition functions in HBaseContext
+   */
+  private class GetMapPartition[T, U](tableName: TableName,
+                                      batchSize: Integer,
+                                      makeGet: (T) => Get,
+                                      convertResult: (Result) => U)
+    extends Serializable {
+
+    val tName = tableName.getName
+
+    def run(iterator: Iterator[T], connection: Connection): Iterator[U] = {
+      val table = connection.getTable(TableName.valueOf(tName))
+
+      val gets = new java.util.ArrayList[Get]()
+      var res = List[U]()
+
+      while (iterator.hasNext) {
+        gets.add(makeGet(iterator.next()))
+
+        if (gets.size() == batchSize) {
+          val results = table.get(gets)
+          res = res ++ results.map(convertResult)
+          gets.clear()
+        }
+      }
+      if (gets.size() > 0) {
+        val results = table.get(gets)
+        res = res ++ results.map(convertResult)
+        gets.clear()
+      }
+      table.close()
+      res.iterator
+    }
+  }
+
+  /**
+   * Produces a ClassTag[T], which is actually just a casted ClassTag[AnyRef].
+   *
+   * This method is used to keep ClassTags out of the external Java API, as
+   * the Java compiler cannot produce them automatically. While this
+   * ClassTag-faking does please the compiler, it can cause problems at runtime
+   * if the Scala API relies on ClassTags for correctness.
+   *
+   * Often, though, a ClassTag[AnyRef] will not lead to incorrect behavior,
+   * just worse performance or security issues.
+   * For instance, an Array of AnyRef can hold any type T, but may lose primitive
+   * specialization.
+   */
+  private[spark]
+  def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
+
+  /**
+   * Spark Implementation of HBase Bulk load for wide rows or when
+   * values are not already combined at the time of the map process
+   *
+   * This will take the content from an existing RDD then sort and shuffle
+   * it with respect to region splits.  The result of that sort and shuffle
+   * will be written to HFiles.
+   *
+   * After this function is executed the user will have to call
+   * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
+   *
+   * Also note this version of bulk load is different from past versions in
+   * that it includes the qualifier as part of the sort process. The
+   * reason for this is to be able to support rows will very large number
+   * of columns.
+   *
+   * @param rdd                            The RDD we are bulk loading from
+   * @param tableName                      The HBase table we are loading into
+   * @param flatMap                        A flapMap function that will make every
+   *                                       row in the RDD
+   *                                       into N cells for the bulk load
+   * @param stagingDir                     The location on the FileSystem to bulk load into
+   * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+   *                                       column family is written
+   * @param compactionExclude              Compaction excluded for the HFiles
+   * @param maxSize                        Max size for the HFiles before they roll
+   * @tparam T                             The Type of values in the original RDD
+   */
+  def bulkLoad[T](rdd:RDD[T],
+                  tableName: TableName,
+                  flatMap: (T) => Iterator[(KeyFamilyQualifier, Array[Byte])],
+                  stagingDir:String,
+                  familyHFileWriteOptionsMap:
+                  util.Map[Array[Byte], FamilyHFileWriteOptions] =
+                  new util.HashMap[Array[Byte], FamilyHFileWriteOptions],
+                  compactionExclude: Boolean = false,
+                  maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):
+  Unit = {
+    val stagingPath = new Path(stagingDir)
+    val fs = stagingPath.getFileSystem(config)
+    if (fs.exists(stagingPath)) {
+      throw new FileAlreadyExistsException("Path " + stagingDir + " already exists")
+    }
+    val conn = HBaseConnectionCache.getConnection(config)
+    try {
+      val regionLocator = conn.getRegionLocator(tableName)
+      val startKeys = regionLocator.getStartKeys
+      if (startKeys.length == 0) {
+        logInfo("Table " + tableName.toString + " was not found")
+      }
+      val defaultCompressionStr = config.get("hfile.compression",
+        Compression.Algorithm.NONE.getName)
+      val hfileCompression = HFileWriterImpl
+        .compressionByName(defaultCompressionStr)
+      val nowTimeStamp = System.currentTimeMillis()
+      val tableRawName = tableName.getName
+
+      val familyHFileWriteOptionsMapInternal =
+        new util.HashMap[ByteArrayWrapper, FamilyHFileWriteOptions]
+
+      val entrySetIt = familyHFileWriteOptionsMap.entrySet().iterator()
+
+      while (entrySetIt.hasNext) {
+        val entry = entrySetIt.next()
+        familyHFileWriteOptionsMapInternal.put(new ByteArrayWrapper(entry.getKey), entry.getValue)
+      }
+
+      val regionSplitPartitioner =
+        new BulkLoadPartitioner(startKeys)
+
+      //This is where all the magic happens
+      //Here we are going to do the following things
+      // 1. FlapMap every row in the RDD into key column value tuples
+      // 2. Then we are going to repartition sort and shuffle
+      // 3. Finally we are going to write out our HFiles
+      rdd.flatMap( r => flatMap(r)).
+        repartitionAndSortWithinPartitions(regionSplitPartitioner).
+        hbaseForeachPartition(this, (it, conn) => {
+
+          val conf = broadcastedConf.value.value
+          val fs = FileSystem.get(conf)
+          val writerMap = new mutable.HashMap[ByteArrayWrapper, WriterLength]
+          var previousRow:Array[Byte] = HConstants.EMPTY_BYTE_ARRAY
+          var rollOverRequested = false
+          val localTableName = TableName.valueOf(tableRawName)
+
+          //Here is where we finally iterate through the data in this partition of the
+          //RDD that has been sorted and partitioned
+          it.foreach{ case (keyFamilyQualifier, cellValue:Array[Byte]) =>
+
+            val wl = writeValueToHFile(keyFamilyQualifier.rowKey,
+              keyFamilyQualifier.family,
+              keyFamilyQualifier.qualifier,
+              cellValue,
+              nowTimeStamp,
+              fs,
+              conn,
+              localTableName,
+              conf,
+              familyHFileWriteOptionsMapInternal,
+              hfileCompression,
+              writerMap,
+              stagingDir)
+
+            rollOverRequested = rollOverRequested || wl.written > maxSize
+
+            //This will only roll if we have at least one column family file that is
+            //bigger then maxSize and we have finished a given row key
+            if (rollOverRequested && Bytes.compareTo(previousRow, keyFamilyQualifier.rowKey) != 0) {
+              rollWriters(fs, writerMap,
+                regionSplitPartitioner,
+                previousRow,
+                compactionExclude)
+              rollOverRequested = false
+            }
+
+            previousRow = keyFamilyQualifier.rowKey
+          }
+          //We have finished all the data so lets close up the writers
+          rollWriters(fs, writerMap,
+            regionSplitPartitioner,
+            previousRow,
+            compactionExclude)
+          rollOverRequested = false
+        })
+    } finally {
+      if(null != conn) conn.close()
+    }
+  }
+
+  /**
+   * Spark Implementation of HBase Bulk load for short rows some where less then
+   * a 1000 columns.  This bulk load should be faster for tables will thinner
+   * rows then the other spark implementation of bulk load that puts only one
+   * value into a record going into a shuffle
+   *
+   * This will take the content from an existing RDD then sort and shuffle
+   * it with respect to region splits.  The result of that sort and shuffle
+   * will be written to HFiles.
+   *
+   * After this function is executed the user will have to call
+   * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
+   *
+   * In this implementation, only the rowKey is given to the shuffle as the key
+   * and all the columns are already linked to the RowKey before the shuffle
+   * stage.  The sorting of the qualifier is done in memory out side of the
+   * shuffle stage
+   *
+   * Also make sure that incoming RDDs only have one record for every row key.
+   *
+   * @param rdd                            The RDD we are bulk loading from
+   * @param tableName                      The HBase table we are loading into
+   * @param mapFunction                    A function that will convert the RDD records to
+   *                                       the key value format used for the shuffle to prep
+   *                                       for writing to the bulk loaded HFiles
+   * @param stagingDir                     The location on the FileSystem to bulk load into
+   * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+   *                                       column family is written
+   * @param compactionExclude              Compaction excluded for the HFiles
+   * @param maxSize                        Max size for the HFiles before they roll
+   * @tparam T                             The Type of values in the original RDD
+   */
+  def bulkLoadThinRows[T](rdd:RDD[T],
+                  tableName: TableName,
+                  mapFunction: (T) =>
+                    (ByteArrayWrapper, FamiliesQualifiersValues),
+                  stagingDir:String,
+                  familyHFileWriteOptionsMap:
+                  util.Map[Array[Byte], FamilyHFileWriteOptions] =
+                  new util.HashMap[Array[Byte], FamilyHFileWriteOptions],
+                  compactionExclude: Boolean = false,
+                  maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):
+  Unit = {
+    val stagingPath = new Path(stagingDir)
+    val fs = stagingPath.getFileSystem(config)
+    if (fs.exists(stagingPath)) {
+      throw new FileAlreadyExistsException("Path " + stagingDir + " already exists")
+    }
+    val conn = HBaseConnectionCache.getConnection(config)
+    try {
+      val regionLocator = conn.getRegionLocator(tableName)
+      val startKeys = regionLocator.getStartKeys
+      if (startKeys.length == 0) {
+        logInfo("Table " + tableName.toString + " was not found")
+      }
+      val defaultCompressionStr = config.get("hfile.compression",
+        Compression.Algorithm.NONE.getName)
+      val defaultCompression = HFileWriterImpl
+        .compressionByName(defaultCompressionStr)
+      val nowTimeStamp = System.currentTimeMillis()
+      val tableRawName = tableName.getName
+
+      val familyHFileWriteOptionsMapInternal =
+        new util.HashMap[ByteArrayWrapper, FamilyHFileWriteOptions]
+
+      val entrySetIt = familyHFileWriteOptionsMap.entrySet().iterator()
+
+      while (entrySetIt.hasNext) {
+        val entry = entrySetIt.next()
+        familyHFileWriteOptionsMapInternal.put(new ByteArrayWrapper(entry.getKey), entry.getValue)
+      }
+
+      val regionSplitPartitioner =
+        new BulkLoadPartitioner(startKeys)
+
+      //This is where all the magic happens
+      //Here we are going to do the following things
+      // 1. FlapMap every row in the RDD into key column value tuples
+      // 2. Then we are going to repartition sort and shuffle
+      // 3. Finally we are going to write out our HFiles
+      rdd.map( r => mapFunction(r)).
+        repartitionAndSortWithinPartitions(regionSplitPartitioner).
+        hbaseForeachPartition(this, (it, conn) => {
+
+          val conf = broadcastedConf.value.value
+          val fs = FileSystem.get(conf)
+          val writerMap = new mutable.HashMap[ByteArrayWrapper, WriterLength]
+          var previousRow:Array[Byte] = HConstants.EMPTY_BYTE_ARRAY
+          var rollOverRequested = false
+          val localTableName = TableName.valueOf(tableRawName)
+
+          //Here is where we finally iterate through the data in this partition of the
+          //RDD that has been sorted and partitioned
+          it.foreach{ case (rowKey:ByteArrayWrapper,
+          familiesQualifiersValues:FamiliesQualifiersValues) =>
+
+
+            if (Bytes.compareTo(previousRow, rowKey.value) == 0) {
+              throw new KeyAlreadyExistsException("The following key was sent to the " +
+                "HFile load more then one: " + Bytes.toString(previousRow))
+            }
+
+            //The family map is a tree map so the families will be sorted
+            val familyIt = familiesQualifiersValues.familyMap.entrySet().iterator()
+            while (familyIt.hasNext) {
+              val familyEntry = familyIt.next()
+
+              val family = familyEntry.getKey.value
+
+              val qualifierIt = familyEntry.getValue.entrySet().iterator()
+
+              //The qualifier map is a tree map so the families will be sorted
+              while (qualifierIt.hasNext) {
+
+                val qualifierEntry = qualifierIt.next()
+                val qualifier = qualifierEntry.getKey
+                val cellValue = qualifierEntry.getValue
+
+                writeValueToHFile(rowKey.value,
+                  family,
+                  qualifier.value, // qualifier
+                  cellValue, // value
+                  nowTimeStamp,
+                  fs,
+                  conn,
+                  localTableName,
+                  conf,
+                  familyHFileWriteOptionsMapInternal,
+                  defaultCompression,
+                  writerMap,
+                  stagingDir)
+
+                previousRow = rowKey.value
+              }
+
+              writerMap.values.foreach( wl => {
+                rollOverRequested = rollOverRequested || wl.written > maxSize
+
+                //This will only roll if we have at least one column family file that is
+                //bigger then maxSize and we have finished a given row key
+                if (rollOverRequested) {
+                  rollWriters(fs, writerMap,
+                    regionSplitPartitioner,
+                    previousRow,
+                    compactionExclude)
+                  rollOverRequested = false
+                }
+              })
+            }
+          }
+
+          //This will get a writer for the column family
+          //If there is no writer for a given column family then
+          //it will get created here.
+          //We have finished all the data so lets close up the writers
+          rollWriters(fs, writerMap,
+            regionSplitPartitioner,
+            previousRow,
+            compactionExclude)
+          rollOverRequested = false
+        })
+    } finally {
+      if(null != conn) conn.close()
+    }
+  }
+
+  /**
+   *  This will return a new HFile writer when requested
+   *
+   * @param family       column family
+   * @param conf         configuration to connect to HBase
+   * @param favoredNodes nodes that we would like to write too
+   * @param fs           FileSystem object where we will be writing the HFiles to
+   * @return WriterLength object
+   */
+  private def getNewHFileWriter(family: Array[Byte], conf: Configuration,
+                   favoredNodes: Array[InetSocketAddress],
+                   fs:FileSystem,
+                   familydir:Path,
+                   familyHFileWriteOptionsMapInternal:
+                   util.HashMap[ByteArrayWrapper, FamilyHFileWriteOptions],
+                   defaultCompression:Compression.Algorithm): WriterLength = {
+
+
+    var familyOptions = familyHFileWriteOptionsMapInternal.get(new ByteArrayWrapper(family))
+
+    if (familyOptions == null) {
+      familyOptions = new FamilyHFileWriteOptions(defaultCompression.toString,
+        BloomType.NONE.toString, HConstants.DEFAULT_BLOCKSIZE, DataBlockEncoding.NONE.toString)
+      familyHFileWriteOptionsMapInternal.put(new ByteArrayWrapper(family), familyOptions)
+    }
+
+    val tempConf = new Configuration(conf)
+    tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f)
+    val contextBuilder = new HFileContextBuilder()
+      .withCompression(Algorithm.valueOf(familyOptions.compression))
+      .withChecksumType(HStore.getChecksumType(conf))
+      .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
+      .withBlockSize(familyOptions.blockSize)
+
+    if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
+      contextBuilder.withIncludesTags(true)
+    }
+
+    contextBuilder.withDataBlockEncoding(DataBlockEncoding.
+      valueOf(familyOptions.dataBlockEncoding))
+    val hFileContext = contextBuilder.build()
+
+    //Add a '_' to the file name because this is a unfinished file.  A rename will happen
+    // to remove the '_' when the file is closed.
+    new WriterLength(0,
+      new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
+        .withBloomType(BloomType.valueOf(familyOptions.bloomType))
+        .withComparator(CellComparator.getInstance()).withFileContext(hFileContext)
+        .withFilePath(new Path(familydir, "_" + UUID.randomUUID.toString.replaceAll("-", "")))
+        .withFavoredNodes(favoredNodes).build())
+
+  }
+
+  /**
+   * Encompasses the logic to write a value to an HFile
+   *
+   * @param rowKey                             The RowKey for the record
+   * @param family                             HBase column family for the record
+   * @param qualifier                          HBase column qualifier for the record
+   * @param cellValue                          HBase cell value
+   * @param nowTimeStamp                       The cell time stamp
+   * @param fs                                 Connection to the FileSystem for the HFile
+   * @param conn                               Connection to HBaes
+   * @param tableName                          HBase TableName object
+   * @param conf                               Configuration to be used when making a new HFile
+   * @param familyHFileWriteOptionsMapInternal Extra configs for the HFile
+   * @param hfileCompression                   The compression codec for the new HFile
+   * @param writerMap                          HashMap of existing writers and their offsets
+   * @param stagingDir                         The staging directory on the FileSystem to store
+   *                                           the HFiles
+   * @return                                   The writer for the given HFile that was writen
+   *                                           too
+   */
+  private def writeValueToHFile(rowKey: Array[Byte],
+                        family: Array[Byte],
+                        qualifier: Array[Byte],
+                        cellValue:Array[Byte],
+                        nowTimeStamp: Long,
+                        fs: FileSystem,
+                        conn: Connection,
+                        tableName: TableName,
+                        conf: Configuration,
+                        familyHFileWriteOptionsMapInternal:
+                        util.HashMap[ByteArrayWrapper, FamilyHFileWriteOptions],
+                        hfileCompression:Compression.Algorithm,
+                        writerMap:mutable.HashMap[ByteArrayWrapper, WriterLength],
+                        stagingDir: String
+                         ): WriterLength = {
+
+    val wl = writerMap.getOrElseUpdate(new ByteArrayWrapper(family), {
+      val familyDir = new Path(stagingDir, Bytes.toString(family))
+
+      fs.mkdirs(familyDir)
+
+      val loc:HRegionLocation = {
+        try {
+          val locator =
+            conn.getRegionLocator(tableName)
+          locator.getRegionLocation(rowKey)
+        } catch {
+          case e: Throwable =>
+            logWarning("there's something wrong when locating rowkey: " +
+              Bytes.toString(rowKey))
+            null
+        }
+      }
+      if (null == loc) {
+        if (log.isTraceEnabled) {
+          logTrace("failed to get region location, so use default writer: " +
+            Bytes.toString(rowKey))
+        }
+        getNewHFileWriter(family = family,
+          conf = conf,
+          favoredNodes = null,
+          fs = fs,
+          familydir = familyDir,
+          familyHFileWriteOptionsMapInternal,
+          hfileCompression)
+      } else {
+        if (log.isDebugEnabled) {
+          logDebug("first rowkey: [" + Bytes.toString(rowKey) + "]")
+        }
+        val initialIsa =
+          new InetSocketAddress(loc.getHostname, loc.getPort)
+        if (initialIsa.isUnresolved) {
+          if (log.isTraceEnabled) {
+            logTrace("failed to resolve bind address: " + loc.getHostname + ":"
+              + loc.getPort + ", so use default writer")
+          }
+          getNewHFileWriter(family,
+            conf,
+            null,
+            fs,
+            familyDir,
+            familyHFileWriteOptionsMapInternal,
+            hfileCompression)
+        } else {
+          if(log.isDebugEnabled) {
+            logDebug("use favored nodes writer: " + initialIsa.getHostString)
+          }
+          getNewHFileWriter(family,
+            conf,
+            Array[InetSocketAddress](initialIsa),
+            fs,
+            familyDir,
+            familyHFileWriteOptionsMapInternal,
+            hfileCompression)
+        }
+      }
+    })
+
+    val keyValue =new KeyValue(rowKey,
+      family,
+      qualifier,
+      nowTimeStamp,cellValue)
+
+    wl.writer.append(keyValue)
+    wl.written += keyValue.getLength
+
+    wl
+  }
+
+  /**
+   * This will roll all Writers
+   * @param fs                     Hadoop FileSystem object
+   * @param writerMap              HashMap that contains all the writers
+   * @param regionSplitPartitioner The partitioner with knowledge of how the
+   *                               Region's are split by row key
+   * @param previousRow            The last row to fill the HFile ending range metadata
+   * @param compactionExclude      The exclude compaction metadata flag for the HFile
+   */
+  private def rollWriters(fs:FileSystem,
+                          writerMap:mutable.HashMap[ByteArrayWrapper, WriterLength],
+                  regionSplitPartitioner: BulkLoadPartitioner,
+                  previousRow: Array[Byte],
+                  compactionExclude: Boolean): Unit = {
+    writerMap.values.foreach( wl => {
+      if (wl.writer != null) {
+        logDebug("Writer=" + wl.writer.getPath +
+          (if (wl.written == 0) "" else ", wrote=" + wl.written))
+        closeHFileWriter(fs, wl.writer,
+          regionSplitPartitioner,
+          previousRow,
+          compactionExclude)
+      }
+    })
+    writerMap.clear()
+
+  }
+
+  /**
+   * Function to close an HFile
+   * @param fs                     Hadoop FileSystem object
+   * @param w                      HFile Writer
+   * @param regionSplitPartitioner The partitioner with knowledge of how the
+   *                               Region's are split by row key
+   * @param previousRow            The last row to fill the HFile ending range metadata
+   * @param compactionExclude      The exclude compaction metadata flag for the HFile
+   */
+  private def closeHFileWriter(fs:FileSystem,
+                               w: StoreFileWriter,
+                               regionSplitPartitioner: BulkLoadPartitioner,
+                               previousRow: Array[Byte],
+                               compactionExclude: Boolean): Unit = {
+    if (w != null) {
+      w.appendFileInfo(HStoreFile.BULKLOAD_TIME_KEY,
+        Bytes.toBytes(System.currentTimeMillis()))
+      w.appendFileInfo(HStoreFile.BULKLOAD_TASK_KEY,
+        Bytes.toBytes(regionSplitPartitioner.getPartition(previousRow)))
+      w.appendFileInfo(HStoreFile.MAJOR_COMPACTION_KEY,
+        Bytes.toBytes(true))
+      w.appendFileInfo(HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
+        Bytes.toBytes(compactionExclude))
+      w.appendTrackedTimestampsToMetadata()
+      w.close()
+
+      val srcPath = w.getPath
+
+      //In the new path you will see that we are using substring.  This is to
+      // remove the '_' character in front of the HFile name.  '_' is a character
+      // that will tell HBase that this file shouldn't be included in the bulk load
+      // This feature is to protect for unfinished HFiles being submitted to HBase
+      val newPath = new Path(w.getPath.getParent, w.getPath.getName.substring(1))
+      if (!fs.rename(srcPath, newPath)) {
+        throw new IOException("Unable to rename '" + srcPath +
+          "' to " + newPath)
+      }
+    }
+  }
+
+  /**
+   * This is a wrapper class around StoreFileWriter.  The reason for the
+   * wrapper is to keep the length of the file along side the writer
+   *
+   * @param written The writer to be wrapped
+   * @param writer  The number of bytes written to the writer
+   */
+  class WriterLength(var written:Long, val writer:StoreFileWriter)
+}
+
+@InterfaceAudience.Private
+object LatestHBaseContextCache {
+  var latest:HBaseContext = null
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctions.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctions.scala
new file mode 100644
index 0000000..4edde44
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctions.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.TableName
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.spark.streaming.dstream.DStream
+
+import scala.reflect.ClassTag
+
+/**
+ * HBaseDStreamFunctions contains a set of implicit functions that can be
+ * applied to a Spark DStream so that we can easily interact with HBase
+ */
+@InterfaceAudience.Public
+object HBaseDStreamFunctions {
+
+  /**
+   * These are implicit methods for a DStream that contains any type of
+   * data.
+   *
+   * @param dStream  This is for dStreams of any type
+   * @tparam T       Type T
+   */
+  implicit class GenericHBaseDStreamFunctions[T](val dStream: DStream[T]) {
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * put.  This will not return a new Stream.  Think of it like a foreach
+     *
+     * @param hc         The hbaseContext object to identify which
+     *                   HBase cluster connection to use
+     * @param tableName  The tableName that the put will be sent to
+     * @param f          The function that will turn the DStream values
+     *                   into HBase Put objects.
+     */
+    def hbaseBulkPut(hc: HBaseContext,
+                     tableName: TableName,
+                     f: (T) => Put): Unit = {
+      hc.streamBulkPut(dStream, tableName, f)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * get.  This will return a new DStream.  Think about it as a DStream map
+     * function.  In that every DStream value will get a new value out of
+     * HBase.  That new value will populate the newly generated DStream.
+     *
+     * @param hc             The hbaseContext object to identify which
+     *                       HBase cluster connection to use
+     * @param tableName      The tableName that the put will be sent to
+     * @param batchSize      How many gets to execute in a single batch
+     * @param f              The function that will turn the RDD values
+     *                       in HBase Get objects
+     * @param convertResult  The function that will convert a HBase
+     *                       Result object into a value that will go
+     *                       into the resulting DStream
+     * @tparam R             The type of Object that will be coming
+     *                       out of the resulting DStream
+     * @return               A resulting DStream with type R objects
+     */
+    def hbaseBulkGet[R: ClassTag](hc: HBaseContext,
+                     tableName: TableName,
+                     batchSize:Int, f: (T) => Get, convertResult: (Result) => R):
+    DStream[R] = {
+      hc.streamBulkGet[T, R](tableName, batchSize, dStream, f, convertResult)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * get.  This will return a new DStream.  Think about it as a DStream map
+     * function.  In that every DStream value will get a new value out of
+     * HBase.  That new value will populate the newly generated DStream.
+     *
+     * @param hc             The hbaseContext object to identify which
+     *                       HBase cluster connection to use
+     * @param tableName      The tableName that the put will be sent to
+     * @param batchSize      How many gets to execute in a single batch
+     * @param f              The function that will turn the RDD values
+     *                       in HBase Get objects
+     * @return               A resulting DStream with type R objects
+     */
+    def hbaseBulkGet(hc: HBaseContext,
+                     tableName: TableName, batchSize:Int,
+                     f: (T) => Get): DStream[(ImmutableBytesWritable, Result)] = {
+        hc.streamBulkGet[T, (ImmutableBytesWritable, Result)](
+          tableName, batchSize, dStream, f,
+          result => (new ImmutableBytesWritable(result.getRow), result))
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * Delete.  This will not return a new DStream.
+     *
+     * @param hc         The hbaseContext object to identify which HBase
+     *                   cluster connection to use
+     * @param tableName  The tableName that the deletes will be sent to
+     * @param f          The function that will convert the DStream value into
+     *                   a HBase Delete Object
+     * @param batchSize  The number of Deletes to be sent in a single batch
+     */
+    def hbaseBulkDelete(hc: HBaseContext,
+                        tableName: TableName,
+                        f:(T) => Delete, batchSize:Int): Unit = {
+      hc.streamBulkDelete(dStream, tableName, f, batchSize)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's
+     * foreachPartition method.  This will ack very much like a normal DStream
+     * foreach method but for the fact that you will now have a HBase connection
+     * while iterating through the values.
+     *
+     * @param hc  The hbaseContext object to identify which HBase
+     *            cluster connection to use
+     * @param f   This function will get an iterator for a Partition of an
+     *            DStream along with a connection object to HBase
+     */
+    def hbaseForeachPartition(hc: HBaseContext,
+                              f: (Iterator[T], Connection) => Unit): Unit = {
+      hc.streamForeachPartition(dStream, f)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's
+     * mapPartitions method.  This will ask very much like a normal DStream
+     * map partitions method but for the fact that you will now have a
+     * HBase connection while iterating through the values
+     *
+     * @param hc  The hbaseContext object to identify which HBase
+     *            cluster connection to use
+     * @param f   This function will get an iterator for a Partition of an
+     *            DStream along with a connection object to HBase
+     * @tparam R  This is the type of objects that will go into the resulting
+     *            DStream
+     * @return    A resulting DStream of type R
+     */
+    def hbaseMapPartitions[R: ClassTag](hc: HBaseContext,
+                                        f: (Iterator[T], Connection) => Iterator[R]):
+    DStream[R] = {
+      hc.streamMapPartitions(dStream, f)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctions.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctions.scala
new file mode 100644
index 0000000..2469c8e
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctions.scala
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util
+
+import org.apache.hadoop.hbase.{HConstants, TableName}
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.spark.rdd.RDD
+
+import scala.reflect.ClassTag
+
+/**
+ * HBaseRDDFunctions contains a set of implicit functions that can be
+ * applied to a Spark RDD so that we can easily interact with HBase
+ */
+@InterfaceAudience.Public
+object HBaseRDDFunctions
+{
+
+  /**
+   * These are implicit methods for a RDD that contains any type of
+   * data.
+   *
+   * @param rdd This is for rdd of any type
+   * @tparam T  This is any type
+   */
+  implicit class GenericHBaseRDDFunctions[T](val rdd: RDD[T]) {
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * put.  This will not return a new RDD.  Think of it like a foreach
+     *
+     * @param hc         The hbaseContext object to identify which
+     *                   HBase cluster connection to use
+     * @param tableName  The tableName that the put will be sent to
+     * @param f          The function that will turn the RDD values
+     *                   into HBase Put objects.
+     */
+    def hbaseBulkPut(hc: HBaseContext,
+                     tableName: TableName,
+                     f: (T) => Put): Unit = {
+      hc.bulkPut(rdd, tableName, f)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * get.  This will return a new RDD.  Think about it as a RDD map
+     * function.  In that every RDD value will get a new value out of
+     * HBase.  That new value will populate the newly generated RDD.
+     *
+     * @param hc             The hbaseContext object to identify which
+     *                       HBase cluster connection to use
+     * @param tableName      The tableName that the put will be sent to
+     * @param batchSize      How many gets to execute in a single batch
+     * @param f              The function that will turn the RDD values
+     *                       in HBase Get objects
+     * @param convertResult  The function that will convert a HBase
+     *                       Result object into a value that will go
+     *                       into the resulting RDD
+     * @tparam R             The type of Object that will be coming
+     *                       out of the resulting RDD
+     * @return               A resulting RDD with type R objects
+     */
+    def hbaseBulkGet[R: ClassTag](hc: HBaseContext,
+                            tableName: TableName, batchSize:Int,
+                            f: (T) => Get, convertResult: (Result) => R): RDD[R] = {
+      hc.bulkGet[T, R](tableName, batchSize, rdd, f, convertResult)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * get.  This will return a new RDD.  Think about it as a RDD map
+     * function.  In that every RDD value will get a new value out of
+     * HBase.  That new value will populate the newly generated RDD.
+     *
+     * @param hc             The hbaseContext object to identify which
+     *                       HBase cluster connection to use
+     * @param tableName      The tableName that the put will be sent to
+     * @param batchSize      How many gets to execute in a single batch
+     * @param f              The function that will turn the RDD values
+     *                       in HBase Get objects
+     * @return               A resulting RDD with type R objects
+     */
+    def hbaseBulkGet(hc: HBaseContext,
+                                  tableName: TableName, batchSize:Int,
+                                  f: (T) => Get): RDD[(ImmutableBytesWritable, Result)] = {
+      hc.bulkGet[T, (ImmutableBytesWritable, Result)](tableName,
+        batchSize, rdd, f,
+        result => if (result != null && result.getRow != null) {
+          (new ImmutableBytesWritable(result.getRow), result)
+        } else {
+          null
+        })
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's bulk
+     * Delete.  This will not return a new RDD.
+     *
+     * @param hc         The hbaseContext object to identify which HBase
+     *                   cluster connection to use
+     * @param tableName  The tableName that the deletes will be sent to
+     * @param f          The function that will convert the RDD value into
+     *                   a HBase Delete Object
+     * @param batchSize  The number of Deletes to be sent in a single batch
+     */
+    def hbaseBulkDelete(hc: HBaseContext,
+                        tableName: TableName, f:(T) => Delete, batchSize:Int): Unit = {
+      hc.bulkDelete(rdd, tableName, f, batchSize)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's
+     * foreachPartition method.  This will ack very much like a normal RDD
+     * foreach method but for the fact that you will now have a HBase connection
+     * while iterating through the values.
+     *
+     * @param hc  The hbaseContext object to identify which HBase
+     *            cluster connection to use
+     * @param f   This function will get an iterator for a Partition of an
+     *            RDD along with a connection object to HBase
+     */
+    def hbaseForeachPartition(hc: HBaseContext,
+                              f: (Iterator[T], Connection) => Unit): Unit = {
+      hc.foreachPartition(rdd, f)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's
+     * mapPartitions method.  This will ask very much like a normal RDD
+     * map partitions method but for the fact that you will now have a
+     * HBase connection while iterating through the values
+     *
+     * @param hc  The hbaseContext object to identify which HBase
+     *            cluster connection to use
+     * @param f   This function will get an iterator for a Partition of an
+     *            RDD along with a connection object to HBase
+     * @tparam R  This is the type of objects that will go into the resulting
+     *            RDD
+     * @return    A resulting RDD of type R
+     */
+    def hbaseMapPartitions[R: ClassTag](hc: HBaseContext,
+                                        f: (Iterator[T], Connection) => Iterator[R]):
+    RDD[R] = {
+      hc.mapPartitions[T,R](rdd, f)
+    }
+
+    /**
+     * Spark Implementation of HBase Bulk load for wide rows or when
+     * values are not already combined at the time of the map process
+     *
+     * A Spark Implementation of HBase Bulk load
+     *
+     * This will take the content from an existing RDD then sort and shuffle
+     * it with respect to region splits.  The result of that sort and shuffle
+     * will be written to HFiles.
+     *
+     * After this function is executed the user will have to call
+     * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
+     *
+     * Also note this version of bulk load is different from past versions in
+     * that it includes the qualifier as part of the sort process. The
+     * reason for this is to be able to support rows will very large number
+     * of columns.
+     *
+     * @param tableName                      The HBase table we are loading into
+     * @param flatMap                        A flapMap function that will make every row in the RDD
+     *                                       into N cells for the bulk load
+     * @param stagingDir                     The location on the FileSystem to bulk load into
+     * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+     *                                       column family is written
+     * @param compactionExclude              Compaction excluded for the HFiles
+     * @param maxSize                        Max size for the HFiles before they roll
+     */
+    def hbaseBulkLoad(hc: HBaseContext,
+                         tableName: TableName,
+                         flatMap: (T) => Iterator[(KeyFamilyQualifier, Array[Byte])],
+                         stagingDir:String,
+                         familyHFileWriteOptionsMap:
+                         util.Map[Array[Byte], FamilyHFileWriteOptions] =
+                         new util.HashMap[Array[Byte], FamilyHFileWriteOptions](),
+                         compactionExclude: Boolean = false,
+                         maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):Unit = {
+      hc.bulkLoad(rdd, tableName,
+        flatMap, stagingDir, familyHFileWriteOptionsMap,
+        compactionExclude, maxSize)
+    }
+
+    /**
+     * Implicit method that gives easy access to HBaseContext's
+     * bulkLoadThinRows method.
+     *
+     * Spark Implementation of HBase Bulk load for short rows some where less then
+     * a 1000 columns.  This bulk load should be faster for tables will thinner
+     * rows then the other spark implementation of bulk load that puts only one
+     * value into a record going into a shuffle
+     *
+     * This will take the content from an existing RDD then sort and shuffle
+     * it with respect to region splits.  The result of that sort and shuffle
+     * will be written to HFiles.
+     *
+     * After this function is executed the user will have to call
+     * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
+     *
+     * In this implementation only the rowKey is given to the shuffle as the key
+     * and all the columns are already linked to the RowKey before the shuffle
+     * stage.  The sorting of the qualifier is done in memory out side of the
+     * shuffle stage
+     *
+     * @param tableName                      The HBase table we are loading into
+     * @param mapFunction                    A function that will convert the RDD records to
+     *                                       the key value format used for the shuffle to prep
+     *                                       for writing to the bulk loaded HFiles
+     * @param stagingDir                     The location on the FileSystem to bulk load into
+     * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+     *                                       column family is written
+     * @param compactionExclude              Compaction excluded for the HFiles
+     * @param maxSize                        Max size for the HFiles before they roll
+     */
+    def hbaseBulkLoadThinRows(hc: HBaseContext,
+                      tableName: TableName,
+                      mapFunction: (T) =>
+                        (ByteArrayWrapper, FamiliesQualifiersValues),
+                      stagingDir:String,
+                      familyHFileWriteOptionsMap:
+                      util.Map[Array[Byte], FamilyHFileWriteOptions] =
+                      new util.HashMap[Array[Byte], FamilyHFileWriteOptions](),
+                      compactionExclude: Boolean = false,
+                      maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):Unit = {
+      hc.bulkLoadThinRows(rdd, tableName,
+        mapFunction, stagingDir, familyHFileWriteOptionsMap,
+        compactionExclude, maxSize)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala
new file mode 100644
index 0000000..be6581a
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util.Map
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.util.Pair
+import org.apache.yetus.audience.InterfaceAudience
+import org.apache.hadoop.hbase.client.{Connection, Delete, Get, Put, Result, Scan}
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.api.java.function.{FlatMapFunction, Function, VoidFunction}
+import org.apache.spark.streaming.api.java.JavaDStream
+
+import java.lang.Iterable
+
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+/**
+ * This is the Java Wrapper over HBaseContext which is written in
+ * Scala.  This class will be used by developers that want to
+ * work with Spark or Spark Streaming in Java
+ *
+ * @param jsc    This is the JavaSparkContext that we will wrap
+ * @param config This is the config information to out HBase cluster
+ */
+@InterfaceAudience.Public
+class JavaHBaseContext(@transient val jsc: JavaSparkContext,
+                       @transient val config: Configuration) extends Serializable {
+  val hbaseContext = new HBaseContext(jsc.sc, config)
+
+  /**
+   * A simple enrichment of the traditional Spark javaRdd foreachPartition.
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * @param javaRdd Original javaRdd with data to iterate over
+   * @param f       Function to be given a iterator to iterate through
+   *                the RDD values and a Connection object to interact
+   *                with HBase
+   */
+  def foreachPartition[T](javaRdd: JavaRDD[T],
+                          f: VoidFunction[(java.util.Iterator[T], Connection)]) = {
+
+    hbaseContext.foreachPartition(javaRdd.rdd,
+      (it: Iterator[T], conn: Connection) => {
+        f.call((it, conn))
+      })
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark Streaming dStream foreach
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * @param javaDstream Original DStream with data to iterate over
+   * @param f           Function to be given a iterator to iterate through
+   *                    the JavaDStream values and a Connection object to
+   *                    interact with HBase
+   */
+  def foreachPartition[T](javaDstream: JavaDStream[T],
+                          f: VoidFunction[(Iterator[T], Connection)]) = {
+    hbaseContext.foreachPartition(javaDstream.dstream,
+      (it: Iterator[T], conn: Connection) => f.call(it, conn))
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark JavaRDD mapPartition.
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * Note: Make sure to partition correctly to avoid memory issue when
+   * getting data from HBase
+   *
+   * @param javaRdd Original JavaRdd with data to iterate over
+   * @param f       Function to be given a iterator to iterate through
+   *                the RDD values and a Connection object to interact
+   *                with HBase
+   * @return        Returns a new RDD generated by the user definition
+   *                function just like normal mapPartition
+   */
+  def mapPartitions[T, R](javaRdd: JavaRDD[T],
+                          f: FlatMapFunction[(java.util.Iterator[T],
+                            Connection), R]): JavaRDD[R] = {
+    JavaRDD.fromRDD(hbaseContext.mapPartitions(javaRdd.rdd,
+      (it: Iterator[T], conn: Connection) =>
+        f.call(it, conn))(fakeClassTag[R]))(fakeClassTag[R])
+  }
+
+  /**
+   * A simple enrichment of the traditional Spark Streaming JavaDStream
+   * mapPartition.
+   *
+   * This function differs from the original in that it offers the
+   * developer access to a already connected Connection object
+   *
+   * Note: Do not close the Connection object.  All Connection
+   * management is handled outside this method
+   *
+   * Note: Make sure to partition correctly to avoid memory issue when
+   * getting data from HBase
+   *
+   * @param javaDstream Original JavaDStream with data to iterate over
+   * @param mp          Function to be given a iterator to iterate through
+   *                    the JavaDStream values and a Connection object to
+   *                    interact with HBase
+   * @return            Returns a new JavaDStream generated by the user
+   *                    definition function just like normal mapPartition
+   */
+  def streamMap[T, U](javaDstream: JavaDStream[T],
+                      mp: Function[(Iterator[T], Connection), Iterator[U]]):
+  JavaDStream[U] = {
+    JavaDStream.fromDStream(hbaseContext.streamMapPartitions(javaDstream.dstream,
+      (it: Iterator[T], conn: Connection) =>
+        mp.call(it, conn))(fakeClassTag[U]))(fakeClassTag[U])
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.foreachPartition method.
+   *
+   * It allow addition support for a user to take JavaRDD
+   * and generate puts and send them to HBase.
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param javaRdd   Original JavaRDD with data to iterate over
+   * @param tableName The name of the table to put into
+   * @param f         Function to convert a value in the JavaRDD
+   *                  to a HBase Put
+   */
+  def bulkPut[T](javaRdd: JavaRDD[T],
+                 tableName: TableName,
+                 f: Function[(T), Put]) {
+
+    hbaseContext.bulkPut(javaRdd.rdd, tableName, (t: T) => f.call(t))
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamMapPartition method.
+   *
+   * It allow addition support for a user to take a JavaDStream and
+   * generate puts and send them to HBase.
+   *
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param javaDstream Original DStream with data to iterate over
+   * @param tableName   The name of the table to put into
+   * @param f           Function to convert a value in
+   *                    the JavaDStream to a HBase Put
+   */
+  def streamBulkPut[T](javaDstream: JavaDStream[T],
+                       tableName: TableName,
+                       f: Function[T, Put]) = {
+    hbaseContext.streamBulkPut(javaDstream.dstream,
+      tableName,
+      (t: T) => f.call(t))
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.foreachPartition method.
+   *
+   * It allow addition support for a user to take a JavaRDD and
+   * generate delete and send them to HBase.
+   *
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param javaRdd   Original JavaRDD with data to iterate over
+   * @param tableName The name of the table to delete from
+   * @param f         Function to convert a value in the JavaRDD to a
+   *                  HBase Deletes
+   * @param batchSize The number of deletes to batch before sending to HBase
+   */
+  def bulkDelete[T](javaRdd: JavaRDD[T], tableName: TableName,
+                    f: Function[T, Delete], batchSize: Integer) {
+    hbaseContext.bulkDelete(javaRdd.rdd, tableName, (t: T) => f.call(t), batchSize)
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamBulkMutation method.
+   *
+   * It allow addition support for a user to take a JavaDStream and
+   * generate Delete and send them to HBase.
+   *
+   * The complexity of managing the Connection is
+   * removed from the developer
+   *
+   * @param javaDStream Original DStream with data to iterate over
+   * @param tableName   The name of the table to delete from
+   * @param f           Function to convert a value in the JavaDStream to a
+   *                    HBase Delete
+   * @param batchSize   The number of deletes to be sent at once
+   */
+  def streamBulkDelete[T](javaDStream: JavaDStream[T],
+                          tableName: TableName,
+                          f: Function[T, Delete],
+                          batchSize: Integer) = {
+    hbaseContext.streamBulkDelete(javaDStream.dstream, tableName,
+      (t: T) => f.call(t),
+      batchSize)
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.mapPartition method.
+   *
+   * It allow addition support for a user to take a JavaRDD and generates a
+   * new RDD based on Gets and the results they bring back from HBase
+   *
+   * @param tableName     The name of the table to get from
+   * @param batchSize     batch size of how many gets to retrieve in a single fetch
+   * @param javaRdd       Original JavaRDD with data to iterate over
+   * @param makeGet       Function to convert a value in the JavaRDD to a
+   *                      HBase Get
+   * @param convertResult This will convert the HBase Result object to
+   *                      what ever the user wants to put in the resulting
+   *                      JavaRDD
+   * @return              New JavaRDD that is created by the Get to HBase
+   */
+  def bulkGet[T, U](tableName: TableName,
+                    batchSize: Integer,
+                    javaRdd: JavaRDD[T],
+                    makeGet: Function[T, Get],
+                    convertResult: Function[Result, U]): JavaRDD[U] = {
+
+    JavaRDD.fromRDD(hbaseContext.bulkGet[T, U](tableName,
+      batchSize,
+      javaRdd.rdd,
+      (t: T) => makeGet.call(t),
+      (r: Result) => {
+        convertResult.call(r)
+      })(fakeClassTag[U]))(fakeClassTag[U])
+
+  }
+
+  /**
+   * A simple abstraction over the HBaseContext.streamMap method.
+   *
+   * It allow addition support for a user to take a DStream and
+   * generates a new DStream based on Gets and the results
+   * they bring back from HBase
+   *
+   * @param tableName     The name of the table to get from
+   * @param batchSize     The number of gets to be batched together
+   * @param javaDStream   Original DStream with data to iterate over
+   * @param makeGet       Function to convert a value in the JavaDStream to a
+   *                      HBase Get
+   * @param convertResult This will convert the HBase Result object to
+   *                      what ever the user wants to put in the resulting
+   *                      JavaDStream
+   * @return              New JavaDStream that is created by the Get to HBase
+   */
+  def streamBulkGet[T, U](tableName: TableName,
+                          batchSize: Integer,
+                          javaDStream: JavaDStream[T],
+                          makeGet: Function[T, Get],
+                          convertResult: Function[Result, U]): JavaDStream[U] = {
+    JavaDStream.fromDStream(hbaseContext.streamBulkGet(tableName,
+      batchSize,
+      javaDStream.dstream,
+      (t: T) => makeGet.call(t),
+      (r: Result) => convertResult.call(r))(fakeClassTag[U]))(fakeClassTag[U])
+  }
+
+  /**
+    * A simple abstraction over the HBaseContext.bulkLoad method.
+    * It allow addition support for a user to take a JavaRDD and
+    * convert into new JavaRDD[Pair] based on MapFunction,
+    * and HFiles will be generated in stagingDir for bulk load
+    *
+    * @param javaRdd                        The javaRDD we are bulk loading from
+    * @param tableName                      The HBase table we are loading into
+    * @param mapFunc                        A Function that will convert a value in JavaRDD
+    *                                       to Pair(KeyFamilyQualifier, Array[Byte])
+    * @param stagingDir                     The location on the FileSystem to bulk load into
+    * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+    *                                       column family is written
+    * @param compactionExclude              Compaction excluded for the HFiles
+    * @param maxSize                        Max size for the HFiles before they roll
+    */
+  def bulkLoad[T](javaRdd: JavaRDD[T],
+                  tableName: TableName,
+                  mapFunc : Function[T, Pair[KeyFamilyQualifier, Array[Byte]]],
+                  stagingDir: String,
+                  familyHFileWriteOptionsMap: Map[Array[Byte], FamilyHFileWriteOptions],
+                  compactionExclude: Boolean,
+                  maxSize: Long):
+  Unit = {
+    hbaseContext.bulkLoad[Pair[KeyFamilyQualifier, Array[Byte]]](javaRdd.map(mapFunc).rdd, tableName, t => {
+      val keyFamilyQualifier = t.getFirst
+      val value = t.getSecond
+      Seq((keyFamilyQualifier, value)).iterator
+    }, stagingDir, familyHFileWriteOptionsMap, compactionExclude, maxSize)
+  }
+
+  /**
+    * A simple abstraction over the HBaseContext.bulkLoadThinRows method.
+    * It allow addition support for a user to take a JavaRDD and
+    * convert into new JavaRDD[Pair] based on MapFunction,
+    * and HFiles will be generated in stagingDir for bulk load
+    *
+    * @param javaRdd                        The javaRDD we are bulk loading from
+    * @param tableName                      The HBase table we are loading into
+    * @param mapFunc                        A Function that will convert a value in JavaRDD
+    *                                       to Pair(ByteArrayWrapper, FamiliesQualifiersValues)
+    * @param stagingDir                     The location on the FileSystem to bulk load into
+    * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
+    *                                       column family is written
+    * @param compactionExclude              Compaction excluded for the HFiles
+    * @param maxSize                        Max size for the HFiles before they roll
+    */
+  def bulkLoadThinRows[T](javaRdd: JavaRDD[T],
+                       tableName: TableName,
+                       mapFunc : Function[T, Pair[ByteArrayWrapper, FamiliesQualifiersValues]],
+                       stagingDir: String,
+                       familyHFileWriteOptionsMap: Map[Array[Byte], FamilyHFileWriteOptions],
+                       compactionExclude: Boolean,
+                       maxSize: Long):
+  Unit = {
+    hbaseContext.bulkLoadThinRows[Pair[ByteArrayWrapper, FamiliesQualifiersValues]](javaRdd.map(mapFunc).rdd,
+      tableName, t => {
+      (t.getFirst, t.getSecond)
+    }, stagingDir, familyHFileWriteOptionsMap, compactionExclude, maxSize)
+  }
+
+  /**
+   * This function will use the native HBase TableInputFormat with the
+   * given scan object to generate a new JavaRDD
+   *
+   * @param tableName The name of the table to scan
+   * @param scans     The HBase scan object to use to read data from HBase
+   * @param f         Function to convert a Result object from HBase into
+   *                  What the user wants in the final generated JavaRDD
+   * @return          New JavaRDD with results from scan
+   */
+  def hbaseRDD[U](tableName: TableName,
+                  scans: Scan,
+                  f: Function[(ImmutableBytesWritable, Result), U]):
+  JavaRDD[U] = {
+    JavaRDD.fromRDD(
+      hbaseContext.hbaseRDD[U](tableName,
+        scans,
+        (v: (ImmutableBytesWritable, Result)) =>
+          f.call(v._1, v._2))(fakeClassTag[U]))(fakeClassTag[U])
+  }
+
+  /**
+   * A overloaded version of HBaseContext hbaseRDD that define the
+   * type of the resulting JavaRDD
+   *
+   * @param tableName The name of the table to scan
+   * @param scans     The HBase scan object to use to read data from HBase
+   * @return          New JavaRDD with results from scan
+   */
+  def hbaseRDD(tableName: TableName,
+               scans: Scan):
+  JavaRDD[(ImmutableBytesWritable, Result)] = {
+    JavaRDD.fromRDD(hbaseContext.hbaseRDD(tableName, scans))
+  }
+
+  /**
+   * Produces a ClassTag[T], which is actually just a casted ClassTag[AnyRef].
+   *
+   * This method is used to keep ClassTags out of the external Java API, as the Java compiler
+   * cannot produce them automatically. While this ClassTag-faking does please the compiler,
+   * it can cause problems at runtime if the Scala API relies on ClassTags for correctness.
+   *
+   * Often, though, a ClassTag[AnyRef] will not lead to incorrect behavior,
+   * just worse performance or security issues.
+   * For instance, an Array[AnyRef] can hold any type T,
+   * but may lose primitive
+   * specialization.
+   */
+  private[spark]
+  def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
+
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala
new file mode 100644
index 0000000..7fd5a62
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.io.Serializable
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * This is the key to be used for sorting and shuffling.
+ *
+ * We will only partition on the rowKey but we will sort on all three
+ *
+ * @param rowKey    Record RowKey
+ * @param family    Record ColumnFamily
+ * @param qualifier Cell Qualifier
+ */
+@InterfaceAudience.Public
+class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
+  extends Comparable[KeyFamilyQualifier] with Serializable {
+  override def compareTo(o: KeyFamilyQualifier): Int = {
+    var result = Bytes.compareTo(rowKey, o.rowKey)
+    if (result == 0) {
+      result = Bytes.compareTo(family, o.family)
+      if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
+    }
+    result
+  }
+  override def toString: String = {
+    Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala
new file mode 100644
index 0000000..a92f4e0
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.yetus.audience.InterfaceAudience
+import org.slf4j.impl.StaticLoggerBinder
+import org.slf4j.Logger
+import org.slf4j.LoggerFactory
+
+/**
+ * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
+ * logging messages at different levels using methods that only evaluate parameters lazily if the
+ * log level is enabled.
+ * Logging is private in Spark 2.0
+ * This is to isolate incompatibilties across Spark releases.
+ */
+@InterfaceAudience.Private
+trait Logging {
+
+  // Make the log field transient so that objects with Logging can
+  // be serialized and used on another machine
+  @transient private var log_ : Logger = null
+
+  // Method to get the logger name for this object
+  protected def logName = {
+    // Ignore trailing $'s in the class names for Scala objects
+    this.getClass.getName.stripSuffix("$")
+  }
+
+  // Method to get or create the logger for this object
+  protected def log: Logger = {
+    if (log_ == null) {
+      initializeLogIfNecessary(false)
+      log_ = LoggerFactory.getLogger(logName)
+    }
+    log_
+  }
+
+  // Log methods that take only a String
+  protected def logInfo(msg: => String) {
+    if (log.isInfoEnabled) log.info(msg)
+  }
+
+  protected def logDebug(msg: => String) {
+    if (log.isDebugEnabled) log.debug(msg)
+  }
+
+  protected def logTrace(msg: => String) {
+    if (log.isTraceEnabled) log.trace(msg)
+  }
+
+  protected def logWarning(msg: => String) {
+    if (log.isWarnEnabled) log.warn(msg)
+  }
+
+  protected def logError(msg: => String) {
+    if (log.isErrorEnabled) log.error(msg)
+  }
+
+  // Log methods that take Throwables (Exceptions/Errors) too
+  protected def logInfo(msg: => String, throwable: Throwable) {
+    if (log.isInfoEnabled) log.info(msg, throwable)
+  }
+
+  protected def logDebug(msg: => String, throwable: Throwable) {
+    if (log.isDebugEnabled) log.debug(msg, throwable)
+  }
+
+  protected def logTrace(msg: => String, throwable: Throwable) {
+    if (log.isTraceEnabled) log.trace(msg, throwable)
+  }
+
+  protected def logWarning(msg: => String, throwable: Throwable) {
+    if (log.isWarnEnabled) log.warn(msg, throwable)
+  }
+
+  protected def logError(msg: => String, throwable: Throwable) {
+    if (log.isErrorEnabled) log.error(msg, throwable)
+  }
+
+  protected def initializeLogIfNecessary(isInterpreter: Boolean): Unit = {
+    if (!Logging.initialized) {
+      Logging.initLock.synchronized {
+        if (!Logging.initialized) {
+          initializeLogging(isInterpreter)
+        }
+      }
+    }
+  }
+
+  private def initializeLogging(isInterpreter: Boolean): Unit = {
+    // Don't use a logger in here, as this is itself occurring during initialization of a logger
+    // If Log4j 1.2 is being used, but is not initialized, load a default properties file
+    val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr
+    Logging.initialized = true
+
+    // Force a call into slf4j to initialize it. Avoids this happening from multiple threads
+    // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
+    log
+  }
+}
+
+private object Logging {
+  @volatile private var initialized = false
+  val initLock = new Object()
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/NewHBaseRDD.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/NewHBaseRDD.scala
new file mode 100644
index 0000000..7088ce9
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/NewHBaseRDD.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.mapreduce.InputFormat
+import org.apache.spark.rdd.NewHadoopRDD
+import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
+
+@InterfaceAudience.Public
+class NewHBaseRDD[K,V](@transient val sc : SparkContext,
+                       @transient val inputFormatClass: Class[_ <: InputFormat[K, V]],
+                       @transient val keyClass: Class[K],
+                       @transient val valueClass: Class[V],
+                       @transient private val __conf: Configuration,
+                       val hBaseContext: HBaseContext) extends NewHadoopRDD(sc, inputFormatClass, keyClass, valueClass, __conf) {
+
+  override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
+    hBaseContext.applyCreds()
+    super.compute(theSplit, context)
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Bound.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Bound.scala
new file mode 100644
index 0000000..af372ea
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Bound.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.spark.hbase._
+
+/**
+ * The Bound represent the boudary for the scan
+ *
+ * @param b The byte array of the bound
+ * @param inc inclusive or not.
+ */
+@InterfaceAudience.Private
+case class Bound(b: Array[Byte], inc: Boolean)
+// The non-overlapping ranges we need to scan, if lower is equal to upper, it is a get request
+
+@InterfaceAudience.Private
+case class Range(lower: Option[Bound], upper: Option[Bound])
+
+@InterfaceAudience.Private
+object Range {
+  def apply(region: HBaseRegion): Range = {
+    Range(region.start.map(Bound(_, true)), if (region.end.get.size == 0) {
+      None
+    } else {
+      region.end.map((Bound(_, false)))
+    })
+  }
+}
+
+@InterfaceAudience.Private
+object Ranges {
+  // We assume that
+  // 1. r.lower.inc is true, and r.upper.inc is false
+  // 2. for each range in rs, its upper.inc is false
+  def and(r: Range, rs: Seq[Range]): Seq[Range] = {
+    rs.flatMap{ s =>
+      val lower = s.lower.map { x =>
+        // the scan has lower bound
+        r.lower.map { y =>
+          // the region has lower bound
+          if (ord.compare(x.b, y.b) < 0) {
+            // scan lower bound is smaller than region server lower bound
+            Some(y)
+          } else {
+            // scan low bound is greater or equal to region server lower bound
+            Some(x)
+          }
+        }.getOrElse(Some(x))
+      }.getOrElse(r.lower)
+
+      val upper =  s.upper.map { x =>
+        // the scan has upper bound
+        r.upper.map { y =>
+          // the region has upper bound
+          if (ord.compare(x.b, y.b) >= 0) {
+            // scan upper bound is larger than server upper bound
+            // but region server scan stop is exclusive. It is OK here.
+            Some(y)
+          } else {
+            // scan upper bound is less or equal to region server upper bound
+            Some(x)
+          }
+        }.getOrElse(Some(x))
+      }.getOrElse(r.upper)
+
+      val c = lower.map { case x =>
+        upper.map { case y =>
+          ord.compare(x.b, y.b)
+        }.getOrElse(-1)
+      }.getOrElse(-1)
+      if (c < 0) {
+        Some(Range(lower, upper))
+      } else {
+        None
+      }
+    }.seq
+  }
+}
+
+@InterfaceAudience.Private
+object Points {
+  def and(r: Range, ps: Seq[Array[Byte]]): Seq[Array[Byte]] = {
+    ps.flatMap { p =>
+      if (ord.compare(r.lower.get.b, p) <= 0) {
+        // if region lower bound is less or equal to the point
+        if (r.upper.isDefined) {
+          // if region upper bound is defined
+          if (ord.compare(r.upper.get.b, p) > 0) {
+            // if the upper bound is greater than the point (because upper bound is exclusive)
+            Some(p)
+          } else {
+            None
+          }
+        } else {
+          // if the region upper bound is not defined (infinity)
+          Some(p)
+        }
+      } else {
+        None
+      }
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/DataTypeParserWrapper.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/DataTypeParserWrapper.scala
new file mode 100644
index 0000000..c0ccc92
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/DataTypeParserWrapper.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.types.DataType
+import org.apache.yetus.audience.InterfaceAudience
+
+@InterfaceAudience.Private
+trait DataTypeParser {
+  def parse(dataTypeString: String): DataType
+}
+
+@InterfaceAudience.Private
+object DataTypeParserWrapper extends DataTypeParser{
+  def parse(dataTypeString: String): DataType = CatalystSqlParser.parseDataType(dataTypeString)
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseResources.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseResources.scala
new file mode 100644
index 0000000..0f467a7
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseResources.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.spark.{HBaseConnectionKey, SmartConnection,
+  HBaseConnectionCache, HBaseRelation}
+import scala.language.implicitConversions
+
+// Resource and ReferencedResources are defined for extensibility,
+// e.g., consolidate scan and bulkGet in the future work.
+
+// User has to invoke release explicitly to release the resource,
+// and potentially parent resources
+@InterfaceAudience.Private
+trait Resource {
+  def release(): Unit
+}
+
+@InterfaceAudience.Private
+case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource {
+  def release() {
+    rs.close()
+    tbr.release()
+  }
+}
+
+@InterfaceAudience.Private
+case class GetResource(tbr: TableResource, rs: Array[Result]) extends Resource {
+  def release() {
+    tbr.release()
+  }
+}
+
+@InterfaceAudience.Private
+trait ReferencedResource {
+  var count: Int = 0
+  def init(): Unit
+  def destroy(): Unit
+  def acquire() = synchronized {
+    try {
+      count += 1
+      if (count == 1) {
+        init()
+      }
+    } catch {
+      case e: Throwable =>
+        release()
+        throw e
+    }
+  }
+
+  def release() = synchronized {
+    count -= 1
+    if (count == 0) {
+      destroy()
+    }
+  }
+
+  def releaseOnException[T](func: => T): T = {
+    acquire()
+    val ret = {
+      try {
+        func
+      } catch {
+        case e: Throwable =>
+          release()
+          throw e
+      }
+    }
+    ret
+  }
+}
+
+@InterfaceAudience.Private
+case class TableResource(relation: HBaseRelation) extends ReferencedResource {
+  var connection: SmartConnection = _
+  var table: Table = _
+
+  override def init(): Unit = {
+    connection = HBaseConnectionCache.getConnection(relation.hbaseConf)
+    table = connection.getTable(TableName.valueOf(relation.tableName))
+  }
+
+  override def destroy(): Unit = {
+    if (table != null) {
+      table.close()
+      table = null
+    }
+    if (connection != null) {
+      connection.close()
+      connection = null
+    }
+  }
+
+  def getScanner(scan: Scan): ScanResource = releaseOnException {
+    ScanResource(this, table.getScanner(scan))
+  }
+
+  def get(list: java.util.List[org.apache.hadoop.hbase.client.Get]) = releaseOnException {
+    GetResource(this, table.get(list))
+  }
+}
+
+@InterfaceAudience.Private
+case class RegionResource(relation: HBaseRelation) extends ReferencedResource {
+  var connection: SmartConnection = _
+  var rl: RegionLocator = _
+  val regions = releaseOnException {
+    val keys = rl.getStartEndKeys
+    keys.getFirst.zip(keys.getSecond)
+      .zipWithIndex
+      .map(x =>
+      HBaseRegion(x._2,
+        Some(x._1._1),
+        Some(x._1._2),
+        Some(rl.getRegionLocation(x._1._1).getHostname)))
+  }
+
+  override def init(): Unit = {
+    connection = HBaseConnectionCache.getConnection(relation.hbaseConf)
+    rl = connection.getRegionLocator(TableName.valueOf(relation.tableName))
+  }
+
+  override def destroy(): Unit = {
+    if (rl != null) {
+      rl.close()
+      rl = null
+    }
+    if (connection != null) {
+      connection.close()
+      connection = null
+    }
+  }
+}
+
+@InterfaceAudience.Private
+object HBaseResources{
+  implicit def ScanResToScan(sr: ScanResource): ResultScanner = {
+    sr.rs
+  }
+
+  implicit def GetResToResult(gr: GetResource): Array[Result] = {
+    gr.rs
+  }
+
+  implicit def TableResToTable(tr: TableResource): Table = {
+    tr.table
+  }
+
+  implicit def RegionResToRegions(rr: RegionResource): Seq[HBaseRegion] = {
+    rr.regions
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseSparkConf.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseSparkConf.scala
new file mode 100644
index 0000000..dc497f9
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseSparkConf.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This is the hbase configuration. User can either set them in SparkConf, which
+ * will take effect globally, or configure it per table, which will overwrite the value
+ * set in SparkConf. If not set, the default value will take effect.
+ */
+@InterfaceAudience.Public
+object HBaseSparkConf{
+  /** Set to false to disable server-side caching of blocks for this scan,
+   *  false by default, since full table scans generate too much BC churn.
+   */
+  val QUERY_CACHEBLOCKS = "hbase.spark.query.cacheblocks"
+  val DEFAULT_QUERY_CACHEBLOCKS = false
+  /** The number of rows for caching that will be passed to scan. */
+  val QUERY_CACHEDROWS = "hbase.spark.query.cachedrows"
+  /** Set the maximum number of values to return for each call to next() in scan. */
+  val QUERY_BATCHSIZE = "hbase.spark.query.batchsize"
+  /** The number of BulkGets send to HBase. */
+  val BULKGET_SIZE = "hbase.spark.bulkget.size"
+  val DEFAULT_BULKGET_SIZE = 1000
+  /** Set to specify the location of hbase configuration file. */
+  val HBASE_CONFIG_LOCATION = "hbase.spark.config.location"
+  /** Set to specify whether create or use latest cached HBaseContext*/
+  val USE_HBASECONTEXT = "hbase.spark.use.hbasecontext"
+  val DEFAULT_USE_HBASECONTEXT = true
+  /** Pushdown the filter to data source engine to increase the performance of queries. */
+  val PUSHDOWN_COLUMN_FILTER = "hbase.spark.pushdown.columnfilter"
+  val DEFAULT_PUSHDOWN_COLUMN_FILTER= true
+  /** Class name of the encoder, which encode data types from Spark to HBase bytes. */
+  val QUERY_ENCODER = "hbase.spark.query.encoder"
+  val DEFAULT_QUERY_ENCODER = classOf[NaiveEncoder].getCanonicalName
+  /** The timestamp used to filter columns with a specific timestamp. */
+  val TIMESTAMP = "hbase.spark.query.timestamp"
+  /** The starting timestamp used to filter columns with a specific range of versions. */
+  val TIMERANGE_START = "hbase.spark.query.timerange.start"
+  /** The ending timestamp used to filter columns with a specific range of versions. */
+  val TIMERANGE_END =  "hbase.spark.query.timerange.end"
+  /** The maximum number of version to return. */
+  val MAX_VERSIONS = "hbase.spark.query.maxVersions"
+  /** Delayed time to close hbase-spark connection when no reference to this connection, in milliseconds. */
+  val DEFAULT_CONNECTION_CLOSE_DELAY = 10 * 60 * 1000
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableCatalog.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableCatalog.scala
new file mode 100644
index 0000000..d2a8a3e
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableCatalog.scala
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.avro.Schema
+import org.apache.yetus.audience.InterfaceAudience
+import org.apache.hadoop.hbase.spark.{Logging, SchemaConverters}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.sql.types._
+import org.json4s.jackson.JsonMethods._
+
+import scala.collection.mutable
+
+// The definition of each column cell, which may be composite type
+// TODO: add avro support
+@InterfaceAudience.Private
+case class Field(
+    colName: String,
+    cf: String,
+    col: String,
+    sType: Option[String] = None,
+    avroSchema: Option[String] = None,
+    serdes: Option[SerDes]= None,
+    len: Int = -1) extends Logging {
+  override def toString = s"$colName $cf $col"
+  val isRowKey = cf == HBaseTableCatalog.rowKey
+  var start: Int = _
+  def schema: Option[Schema] = avroSchema.map { x =>
+    logDebug(s"avro: $x")
+    val p = new Schema.Parser
+    p.parse(x)
+  }
+
+  lazy val exeSchema = schema
+
+  // converter from avro to catalyst structure
+  lazy val avroToCatalyst: Option[Any => Any] = {
+    schema.map(SchemaConverters.createConverterToSQL(_))
+  }
+
+  // converter from catalyst to avro
+  lazy val catalystToAvro: (Any) => Any ={
+    SchemaConverters.createConverterToAvro(dt, colName, "recordNamespace")
+  }
+
+  def cfBytes: Array[Byte] = {
+    if (isRowKey) {
+      Bytes.toBytes("")
+    } else {
+      Bytes.toBytes(cf)
+    }
+  }
+  def colBytes: Array[Byte] = {
+    if (isRowKey) {
+      Bytes.toBytes("key")
+    } else {
+      Bytes.toBytes(col)
+    }
+  }
+
+  val dt = {
+    sType.map(DataTypeParserWrapper.parse(_)).getOrElse{
+      schema.map{ x=>
+        SchemaConverters.toSqlType(x).dataType
+      }.get
+    }
+  }
+
+  var length: Int = {
+    if (len == -1) {
+      dt match {
+        case BinaryType | StringType => -1
+        case BooleanType => Bytes.SIZEOF_BOOLEAN
+        case ByteType => 1
+        case DoubleType => Bytes.SIZEOF_DOUBLE
+        case FloatType => Bytes.SIZEOF_FLOAT
+        case IntegerType => Bytes.SIZEOF_INT
+        case LongType => Bytes.SIZEOF_LONG
+        case ShortType => Bytes.SIZEOF_SHORT
+        case _ => -1
+      }
+    } else {
+      len
+    }
+
+  }
+
+  override def equals(other: Any): Boolean = other match {
+    case that: Field =>
+      colName == that.colName && cf == that.cf && col == that.col
+    case _ => false
+  }
+}
+
+// The row key definition, with each key refer to the col defined in Field, e.g.,
+// key1:key2:key3
+@InterfaceAudience.Private
+case class RowKey(k: String) {
+  val keys = k.split(":")
+  var fields: Seq[Field] = _
+  var varLength = false
+  def length = {
+    if (varLength) {
+      -1
+    } else {
+      fields.foldLeft(0){case (x, y) =>
+        x + y.length
+      }
+    }
+  }
+}
+// The map between the column presented to Spark and the HBase field
+@InterfaceAudience.Private
+case class SchemaMap(map: mutable.HashMap[String, Field]) {
+  def toFields = map.map { case (name, field) =>
+    StructField(name, field.dt)
+  }.toSeq
+
+  def fields = map.values
+
+  def getField(name: String) = map(name)
+}
+
+
+// The definition of HBase and Relation relation schema
+@InterfaceAudience.Private
+case class HBaseTableCatalog(
+     namespace: String,
+     name: String,
+     row: RowKey,
+     sMap: SchemaMap,
+     @transient params: Map[String, String]) extends Logging {
+  def toDataType = StructType(sMap.toFields)
+  def getField(name: String) = sMap.getField(name)
+  def getRowKey: Seq[Field] = row.fields
+  def getPrimaryKey= row.keys(0)
+  def getColumnFamilies = {
+    sMap.fields.map(_.cf).filter(_ != HBaseTableCatalog.rowKey).toSeq.distinct
+  }
+
+  def get(key: String) = params.get(key)
+
+  // Setup the start and length for each dimension of row key at runtime.
+  def dynSetupRowKey(rowKey: Array[Byte]) {
+    logDebug(s"length: ${rowKey.length}")
+    if(row.varLength) {
+      var start = 0
+      row.fields.foreach { f =>
+        logDebug(s"start: $start")
+        f.start = start
+        f.length = {
+          // If the length is not defined
+          if (f.length == -1) {
+            f.dt match {
+              case StringType =>
+                var pos = rowKey.indexOf(HBaseTableCatalog.delimiter, start)
+                if (pos == -1 || pos > rowKey.length) {
+                  // this is at the last dimension
+                  pos = rowKey.length
+                }
+                pos - start
+              // We don't know the length, assume it extend to the end of the rowkey.
+              case _ => rowKey.length - start
+            }
+          } else {
+            f.length
+          }
+        }
+        start += f.length
+      }
+    }
+  }
+
+  def initRowKey = {
+    val fields = sMap.fields.filter(_.cf == HBaseTableCatalog.rowKey)
+    row.fields = row.keys.flatMap(n => fields.find(_.col == n))
+    // The length is determined at run time if it is string or binary and the length is undefined.
+    if (row.fields.filter(_.length == -1).isEmpty) {
+      var start = 0
+      row.fields.foreach { f =>
+        f.start = start
+        start += f.length
+      }
+    } else {
+      row.varLength = true
+    }
+  }
+  initRowKey
+}
+
+@InterfaceAudience.Public
+object HBaseTableCatalog {
+  // If defined and larger than 3, a new table will be created with the nubmer of region specified.
+  val newTable = "newtable"
+  // The json string specifying hbase catalog information
+  val regionStart = "regionStart"
+  val defaultRegionStart = "aaaaaaa"
+  val regionEnd = "regionEnd"
+  val defaultRegionEnd = "zzzzzzz"
+  val tableCatalog = "catalog"
+  // The row key with format key1:key2 specifying table row key
+  val rowKey = "rowkey"
+  // The key for hbase table whose value specify namespace and table name
+  val table = "table"
+  // The namespace of hbase table
+  val nameSpace = "namespace"
+  // The name of hbase table
+  val tableName = "name"
+  // The name of columns in hbase catalog
+  val columns = "columns"
+  val cf = "cf"
+  val col = "col"
+  val `type` = "type"
+  // the name of avro schema json string
+  val avro = "avro"
+  val delimiter: Byte = 0
+  val serdes = "serdes"
+  val length = "length"
+
+  /**
+    * User provide table schema definition
+    * {"tablename":"name", "rowkey":"key1:key2",
+    * "columns":{"col1":{"cf":"cf1", "col":"col1", "type":"type1"},
+    * "col2":{"cf":"cf2", "col":"col2", "type":"type2"}}}
+    * Note that any col in the rowKey, there has to be one corresponding col defined in columns
+    */
+  def apply(params: Map[String, String]): HBaseTableCatalog = {
+    val parameters = convert(params)
+    //  println(jString)
+    val jString = parameters(tableCatalog)
+    val map = parse(jString).values.asInstanceOf[Map[String, _]]
+    val tableMeta = map.get(table).get.asInstanceOf[Map[String, _]]
+    val nSpace = tableMeta.get(nameSpace).getOrElse("default").asInstanceOf[String]
+    val tName = tableMeta.get(tableName).get.asInstanceOf[String]
+    val cIter = map.get(columns).get.asInstanceOf[Map[String, Map[String, String]]].toIterator
+    val schemaMap = mutable.HashMap.empty[String, Field]
+    cIter.foreach { case (name, column) =>
+      val sd = {
+        column.get(serdes).asInstanceOf[Option[String]].map(n =>
+          Class.forName(n).newInstance().asInstanceOf[SerDes]
+        )
+      }
+      val len = column.get(length).map(_.toInt).getOrElse(-1)
+      val sAvro = column.get(avro).map(parameters(_))
+      val f = Field(name, column.getOrElse(cf, rowKey),
+        column.get(col).get,
+        column.get(`type`),
+        sAvro, sd, len)
+      schemaMap.+=((name, f))
+    }
+    val rKey = RowKey(map.get(rowKey).get.asInstanceOf[String])
+    HBaseTableCatalog(nSpace, tName, rKey, SchemaMap(schemaMap), parameters)
+  }
+
+  val TABLE_KEY: String = "hbase.table"
+  val SCHEMA_COLUMNS_MAPPING_KEY: String = "hbase.columns.mapping"
+
+  /* for backward compatibility. Convert the old definition to new json based definition formated as below
+    val catalog = s"""{
+                      |"table":{"namespace":"default", "name":"htable"},
+                      |"rowkey":"key1:key2",
+                      |"columns":{
+                      |"col1":{"cf":"rowkey", "col":"key1", "type":"string"},
+                      |"col2":{"cf":"rowkey", "col":"key2", "type":"double"},
+                      |"col3":{"cf":"cf1", "col":"col2", "type":"binary"},
+                      |"col4":{"cf":"cf1", "col":"col3", "type":"timestamp"},
+                      |"col5":{"cf":"cf1", "col":"col4", "type":"double", "serdes":"${classOf[DoubleSerDes].getName}"},
+                      |"col6":{"cf":"cf1", "col":"col5", "type":"$map"},
+                      |"col7":{"cf":"cf1", "col":"col6", "type":"$array"},
+                      |"col8":{"cf":"cf1", "col":"col7", "type":"$arrayMap"}
+                      |}
+                      |}""".stripMargin
+   */
+  @deprecated("Please use new json format to define HBaseCatalog")
+  // TODO: There is no need to deprecate since this is the first release.
+  def convert(parameters: Map[String, String]): Map[String, String] = {
+    val tableName = parameters.get(TABLE_KEY).getOrElse(null)
+    // if the hbase.table is not defined, we assume it is json format already.
+    if (tableName == null) return parameters
+    val schemaMappingString = parameters.getOrElse(SCHEMA_COLUMNS_MAPPING_KEY, "")
+    import scala.collection.JavaConverters._
+    val schemaMap = generateSchemaMappingMap(schemaMappingString).asScala.map(_._2.asInstanceOf[SchemaQualifierDefinition])
+
+    val rowkey = schemaMap.filter {
+      _.columnFamily == "rowkey"
+    }.map(_.columnName)
+    val cols = schemaMap.map { x =>
+      s""""${x.columnName}":{"cf":"${x.columnFamily}", "col":"${x.qualifier}", "type":"${x.colType}"}""".stripMargin
+    }
+    val jsonCatalog =
+      s"""{
+         |"table":{"namespace":"default", "name":"${tableName}"},
+         |"rowkey":"${rowkey.mkString(":")}",
+         |"columns":{
+         |${cols.mkString(",")}
+         |}
+         |}
+       """.stripMargin
+    parameters ++ Map(HBaseTableCatalog.tableCatalog->jsonCatalog)
+  }
+
+  /**
+    * Reads the SCHEMA_COLUMNS_MAPPING_KEY and converts it to a map of
+    * SchemaQualifierDefinitions with the original sql column name as the key
+    *
+    * @param schemaMappingString The schema mapping string from the SparkSQL map
+    * @return                    A map of definitions keyed by the SparkSQL column name
+    */
+  @InterfaceAudience.Private
+  def generateSchemaMappingMap(schemaMappingString:String):
+  java.util.HashMap[String, SchemaQualifierDefinition] = {
+    println(schemaMappingString)
+    try {
+      val columnDefinitions = schemaMappingString.split(',')
+      val resultingMap = new java.util.HashMap[String, SchemaQualifierDefinition]()
+      columnDefinitions.map(cd => {
+        val parts = cd.trim.split(' ')
+
+        //Make sure we get three parts
+        //<ColumnName> <ColumnType> <ColumnFamily:Qualifier>
+        if (parts.length == 3) {
+          val hbaseDefinitionParts = if (parts(2).charAt(0) == ':') {
+            Array[String]("rowkey", parts(0))
+          } else {
+            parts(2).split(':')
+          }
+          resultingMap.put(parts(0), new SchemaQualifierDefinition(parts(0),
+            parts(1), hbaseDefinitionParts(0), hbaseDefinitionParts(1)))
+        } else {
+          throw new IllegalArgumentException("Invalid value for schema mapping '" + cd +
+            "' should be '<columnName> <columnType> <columnFamily>:<qualifier>' " +
+            "for columns and '<columnName> <columnType> :<qualifier>' for rowKeys")
+        }
+      })
+      resultingMap
+    } catch {
+      case e:Exception => throw
+        new IllegalArgumentException("Invalid value for " + SCHEMA_COLUMNS_MAPPING_KEY +
+          " '" +
+          schemaMappingString + "'", e )
+    }
+  }
+}
+
+/**
+  * Construct to contains column data that spend SparkSQL and HBase
+  *
+  * @param columnName   SparkSQL column name
+  * @param colType      SparkSQL column type
+  * @param columnFamily HBase column family
+  * @param qualifier    HBase qualifier name
+  */
+@InterfaceAudience.Private
+case class SchemaQualifierDefinition(columnName:String,
+    colType:String,
+    columnFamily:String,
+    qualifier:String)
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala
new file mode 100644
index 0000000..6c06811
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import java.util.ArrayList
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.spark._
+import org.apache.hadoop.hbase.spark.hbase._
+import org.apache.hadoop.hbase.spark.datasources.HBaseResources._
+import org.apache.hadoop.hbase.util.ShutdownHookManager
+import org.apache.spark.{SparkEnv, TaskContext, Partition}
+import org.apache.spark.rdd.RDD
+
+import scala.collection.mutable
+
+@InterfaceAudience.Private
+class HBaseTableScanRDD(relation: HBaseRelation,
+                       val hbaseContext: HBaseContext,
+                       @transient val filter: Option[SparkSQLPushDownFilter] = None,
+                        val columns: Seq[Field] = Seq.empty
+     ) extends RDD[Result](relation.sqlContext.sparkContext, Nil)
+  {
+  private def sparkConf = SparkEnv.get.conf
+  @transient var ranges = Seq.empty[Range]
+  @transient var points = Seq.empty[Array[Byte]]
+  def addPoint(p: Array[Byte]) {
+    points :+= p
+  }
+
+  def addRange(r: ScanRange) = {
+    val lower = if (r.lowerBound != null && r.lowerBound.length > 0) {
+      Some(Bound(r.lowerBound, r.isLowerBoundEqualTo))
+    } else {
+      None
+    }
+    val upper = if (r.upperBound != null && r.upperBound.length > 0) {
+      if (!r.isUpperBoundEqualTo) {
+        Some(Bound(r.upperBound, false))
+      } else {
+
+        // HBase stopRow is exclusive: therefore it DOESN'T act like isUpperBoundEqualTo
+        // by default.  So we need to add a new max byte to the stopRow key
+        val newArray = new Array[Byte](r.upperBound.length + 1)
+        System.arraycopy(r.upperBound, 0, newArray, 0, r.upperBound.length)
+
+        //New Max Bytes
+        newArray(r.upperBound.length) = ByteMin
+        Some(Bound(newArray, false))
+      }
+    } else {
+      None
+    }
+    ranges :+= Range(lower, upper)
+  }
+
+  override def getPartitions: Array[Partition] = {
+    val regions = RegionResource(relation)
+    var idx = 0
+    logDebug(s"There are ${regions.size} regions")
+    val ps = regions.flatMap { x =>
+      val rs = Ranges.and(Range(x), ranges)
+      val ps = Points.and(Range(x), points)
+      if (rs.size > 0 || ps.size > 0) {
+        if(log.isDebugEnabled) {
+          rs.foreach(x => logDebug(x.toString))
+        }
+        idx += 1
+        Some(HBaseScanPartition(idx - 1, x, rs, ps, SerializedFilter.toSerializedTypedFilter(filter)))
+      } else {
+        None
+      }
+    }.toArray
+    regions.release()
+    ShutdownHookManager.affixShutdownHook( new Thread() {
+      override def run() {
+        HBaseConnectionCache.close()
+      }
+    }, 0)
+    ps.asInstanceOf[Array[Partition]]
+  }
+
+  override def getPreferredLocations(split: Partition): Seq[String] = {
+    split.asInstanceOf[HBaseScanPartition].regions.server.map {
+      identity
+    }.toSeq
+  }
+
+  private def buildGets(
+      tbr: TableResource,
+      g: Seq[Array[Byte]],
+      filter: Option[SparkSQLPushDownFilter],
+      columns: Seq[Field],
+      hbaseContext: HBaseContext): Iterator[Result] = {
+    g.grouped(relation.bulkGetSize).flatMap{ x =>
+      val gets = new ArrayList[Get](x.size)
+      x.foreach{ y =>
+        val g = new Get(y)
+        handleTimeSemantics(g)
+        columns.foreach { d =>
+          if (!d.isRowKey) {
+            g.addColumn(d.cfBytes, d.colBytes)
+          }
+        }
+        filter.foreach(g.setFilter(_))
+        gets.add(g)
+      }
+      hbaseContext.applyCreds()
+      val tmp = tbr.get(gets)
+      rddResources.addResource(tmp)
+      toResultIterator(tmp)
+    }
+  }
+
+  private def toResultIterator(result: GetResource): Iterator[Result] = {
+    val iterator = new Iterator[Result] {
+      var idx = 0
+      var cur: Option[Result] = None
+      override def hasNext: Boolean = {
+        while(idx < result.length && cur.isEmpty) {
+          val r = result(idx)
+          idx += 1
+          if (!r.isEmpty) {
+            cur = Some(r)
+          }
+        }
+        if (cur.isEmpty) {
+          rddResources.release(result)
+        }
+        cur.isDefined
+      }
+      override def next(): Result = {
+        hasNext
+        val ret = cur.get
+        cur = None
+        ret
+      }
+    }
+    iterator
+  }
+
+  private def buildScan(range: Range,
+      filter: Option[SparkSQLPushDownFilter],
+      columns: Seq[Field]): Scan = {
+    val scan = (range.lower, range.upper) match {
+      case (Some(Bound(a, b)), Some(Bound(c, d))) => new Scan(a, c)
+      case (None, Some(Bound(c, d))) => new Scan(Array[Byte](), c)
+      case (Some(Bound(a, b)), None) => new Scan(a)
+      case (None, None) => new Scan()
+    }
+    handleTimeSemantics(scan)
+
+    columns.foreach { d =>
+      if (!d.isRowKey) {
+        scan.addColumn(d.cfBytes, d.colBytes)
+      }
+    }
+    scan.setCacheBlocks(relation.blockCacheEnable)
+    scan.setBatch(relation.batchNum)
+    scan.setCaching(relation.cacheSize)
+    filter.foreach(scan.setFilter(_))
+    scan
+  }
+  private def toResultIterator(scanner: ScanResource): Iterator[Result] = {
+    val iterator = new Iterator[Result] {
+      var cur: Option[Result] = None
+      override def hasNext: Boolean = {
+        if (cur.isEmpty) {
+          val r = scanner.next()
+          if (r == null) {
+            rddResources.release(scanner)
+          } else {
+            cur = Some(r)
+          }
+        }
+        cur.isDefined
+      }
+      override def next(): Result = {
+        hasNext
+        val ret = cur.get
+        cur = None
+        ret
+      }
+    }
+    iterator
+  }
+
+  lazy val rddResources = RDDResources(new mutable.HashSet[Resource]())
+
+  private def close() {
+    rddResources.release()
+  }
+
+  override def compute(split: Partition, context: TaskContext): Iterator[Result] = {
+    val partition = split.asInstanceOf[HBaseScanPartition]
+    val filter = SerializedFilter.fromSerializedFilter(partition.sf)
+    val scans = partition.scanRanges
+      .map(buildScan(_, filter, columns))
+    val tableResource = TableResource(relation)
+    context.addTaskCompletionListener(context => close())
+    val points = partition.points
+    val gIt: Iterator[Result] =  {
+      if (points.isEmpty) {
+        Iterator.empty: Iterator[Result]
+      } else {
+        buildGets(tableResource, points, filter, columns, hbaseContext)
+      }
+    }
+    val rIts = scans.par
+      .map { scan =>
+      hbaseContext.applyCreds()
+      val scanner = tableResource.getScanner(scan)
+      rddResources.addResource(scanner)
+      scanner
+    }.map(toResultIterator(_))
+      .fold(Iterator.empty: Iterator[Result]){ case (x, y) =>
+      x ++ y
+    } ++ gIt
+    ShutdownHookManager.affixShutdownHook( new Thread() {
+      override def run() {
+        HBaseConnectionCache.close()
+      }
+    }, 0)
+    rIts
+  }
+
+  private def handleTimeSemantics(query: Query): Unit = {
+    // Set timestamp related values if present
+    (query, relation.timestamp, relation.minTimestamp, relation.maxTimestamp)  match {
+      case (q: Scan, Some(ts), None, None) => q.setTimeStamp(ts)
+      case (q: Get, Some(ts), None, None) => q.setTimeStamp(ts)
+
+      case (q:Scan, None, Some(minStamp), Some(maxStamp)) => q.setTimeRange(minStamp, maxStamp)
+      case (q:Get, None, Some(minStamp), Some(maxStamp)) => q.setTimeRange(minStamp, maxStamp)
+
+      case (q, None, None, None) =>
+
+      case _ => throw new IllegalArgumentException(s"Invalid combination of query/timestamp/time range provided. " +
+        s"timeStamp is: ${relation.timestamp.get}, minTimeStamp is: ${relation.minTimestamp.get}, " +
+        s"maxTimeStamp is: ${relation.maxTimestamp.get}")
+    }
+    if (relation.maxVersions.isDefined) {
+      query match {
+        case q: Scan => q.setMaxVersions(relation.maxVersions.get)
+        case q: Get => q.setMaxVersions(relation.maxVersions.get)
+        case _ => throw new IllegalArgumentException("Invalid query provided with maxVersions")
+      }
+    }
+  }
+}
+
+@InterfaceAudience.Private
+case class SerializedFilter(b: Option[Array[Byte]])
+
+object SerializedFilter {
+  def toSerializedTypedFilter(f: Option[SparkSQLPushDownFilter]): SerializedFilter = {
+    SerializedFilter(f.map(_.toByteArray))
+  }
+
+  def fromSerializedFilter(sf: SerializedFilter): Option[SparkSQLPushDownFilter] = {
+    sf.b.map(SparkSQLPushDownFilter.parseFrom(_))
+  }
+}
+
+@InterfaceAudience.Private
+private[hbase] case class HBaseRegion(
+    override val index: Int,
+    val start: Option[HBaseType] = None,
+    val end: Option[HBaseType] = None,
+    val server: Option[String] = None) extends Partition
+
+@InterfaceAudience.Private
+private[hbase] case class HBaseScanPartition(
+    override val index: Int,
+    val regions: HBaseRegion,
+    val scanRanges: Seq[Range],
+    val points: Seq[Array[Byte]],
+    val sf: SerializedFilter) extends Partition
+
+@InterfaceAudience.Private
+case class RDDResources(set: mutable.HashSet[Resource]) {
+  def addResource(s: Resource) {
+    set += s
+  }
+  def release() {
+    set.foreach(release(_))
+  }
+  def release(rs: Resource) {
+    try {
+      rs.release()
+    } finally {
+      set.remove(rs)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala
new file mode 100644
index 0000000..95d4547
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.spark.Logging
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.yetus.audience.InterfaceStability;
+import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.sql.types._
+
+/**
+  * The ranges for the data type whose size is known. Whether the bound is inclusive
+  * or exclusive is undefind, and upper to the caller to decide.
+  *
+  * @param low: the lower bound of the range.
+  * @param upper: the upper bound of the range.
+  */
+@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
+@InterfaceStability.Evolving
+case class BoundRange(low: Array[Byte],upper: Array[Byte])
+
+/**
+  * The class identifies the ranges for a java primitive type. The caller needs
+  * to decide the bound is either inclusive or exclusive on its own.
+  * information
+  *
+  * @param less: the set of ranges for LessThan/LessOrEqualThan
+  * @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo
+  * @param value: the byte array of the original value
+  */
+@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
+@InterfaceStability.Evolving
+case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte])
+
+/**
+  * The trait to support plugin architecture for different encoder/decoder.
+  * encode is used for serializing the data type to byte array and the filter is
+  * used to filter out the unnecessary records.
+  */
+@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
+@InterfaceStability.Evolving
+trait BytesEncoder {
+  def encode(dt: DataType, value: Any): Array[Byte]
+
+  /**
+    * The function performing real filtering operations. The format of filterBytes depends on the
+    * implementation of the BytesEncoder.
+    *
+    * @param input: the current input byte array that needs to be filtered out
+    * @param offset1: the starting offset of the input byte array.
+    * @param length1: the length of the input byte array.
+    * @param filterBytes: the byte array provided by query condition.
+    * @param offset2: the starting offset in the filterBytes.
+    * @param length2: the length of the bytes in the filterBytes
+    * @param ops: The operation of the filter operator.
+    * @return true: the record satisfies the predicates
+    *         false: the record does not satisfy the predicates.
+    */
+  def filter(input: Array[Byte], offset1: Int, length1: Int,
+             filterBytes: Array[Byte], offset2: Int, length2: Int,
+             ops: JavaBytesEncoder): Boolean
+
+  /**
+    * Currently, it is used for partition pruning.
+    * As for some codec, the order may be inconsistent between java primitive
+    * type and its byte array. We may have to  split the predicates on some
+    * of the java primitive type into multiple predicates.
+    *
+    * For example in naive codec,  some of the java primitive types have to be
+    * split into multiple predicates, and union these predicates together to
+    * make the predicates be performed correctly.
+    * For example, if we have "COLUMN < 2", we will transform it into
+    * "0 <= COLUMN < 2 OR Integer.MIN_VALUE <= COLUMN <= -1"
+    */
+  def ranges(in: Any): Option[BoundRanges]
+}
+
+@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
+@InterfaceStability.Evolving
+object JavaBytesEncoder extends Enumeration with Logging{
+  type JavaBytesEncoder = Value
+  val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value
+
+  /**
+    * create the encoder/decoder
+    *
+    * @param clsName: the class name of the encoder/decoder class
+    * @return the instance of the encoder plugin.
+    */
+  def create(clsName: String): BytesEncoder = {
+    try {
+      Class.forName(clsName).newInstance.asInstanceOf[BytesEncoder]
+    } catch {
+      case _: Throwable =>
+        logWarning(s"$clsName cannot be initiated, falling back to naive encoder")
+        new NaiveEncoder()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala
new file mode 100644
index 0000000..a2a6828
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala
@@ -0,0 +1,261 @@
+package org.apache.hadoop.hbase.spark.datasources
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
+import org.apache.hadoop.hbase.spark.Logging
+import org.apache.hadoop.hbase.spark.hbase._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+
+/**
+  * This is the naive non-order preserving encoder/decoder.
+  * Due to the inconsistency of the order between java primitive types
+  * and their bytearray. The data type has to be passed in so that the filter
+  * can work correctly, which is done by wrapping the type into the first byte
+  * of the serialized array.
+  */
+@InterfaceAudience.Private
+class NaiveEncoder extends BytesEncoder with Logging{
+  var code = 0
+  def nextCode: Byte = {
+    code += 1
+    (code - 1).asInstanceOf[Byte]
+  }
+  val BooleanEnc = nextCode
+  val ShortEnc = nextCode
+  val IntEnc = nextCode
+  val LongEnc = nextCode
+  val FloatEnc = nextCode
+  val DoubleEnc = nextCode
+  val StringEnc = nextCode
+  val BinaryEnc = nextCode
+  val TimestampEnc = nextCode
+  val UnknownEnc = nextCode
+
+
+  /**
+    * Evaluate the java primitive type and return the BoundRanges. For one value, it may have
+    * multiple output ranges because of the inconsistency of order between java primitive type
+    * and its byte array order.
+    *
+    * For short, integer, and long, the order of number is consistent with byte array order
+    * if two number has the same sign bit. But the negative number is larger than positive
+    * number in byte array.
+    *
+    * For double and float, the order of positive number is consistent with its byte array order.
+    * But the order of negative number is the reverse order of byte array. Please refer to IEEE-754
+    * and https://en.wikipedia.org/wiki/Single-precision_floating-point_format
+    */
+  def ranges(in: Any): Option[BoundRanges] = in match {
+    case a: Integer =>
+      val b =  Bytes.toBytes(a)
+      if (a >= 0) {
+        logDebug(s"range is 0 to $a and ${Integer.MIN_VALUE} to -1")
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(0: Int), b),
+            BoundRange(Bytes.toBytes(Integer.MIN_VALUE),  Bytes.toBytes(-1: Int))),
+          Array(BoundRange(b,  Bytes.toBytes(Integer.MAX_VALUE))), b))
+      } else {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(Integer.MIN_VALUE), b)),
+          Array(BoundRange(b, Bytes.toBytes(-1: Integer)),
+            BoundRange(Bytes.toBytes(0: Int), Bytes.toBytes(Integer.MAX_VALUE))), b))
+      }
+    case a: Long =>
+      val b =  Bytes.toBytes(a)
+      if (a >= 0) {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(0: Long), b),
+            BoundRange(Bytes.toBytes(Long.MinValue),  Bytes.toBytes(-1: Long))),
+          Array(BoundRange(b,  Bytes.toBytes(Long.MaxValue))), b))
+      } else {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(Long.MinValue), b)),
+          Array(BoundRange(b, Bytes.toBytes(-1: Long)),
+            BoundRange(Bytes.toBytes(0: Long), Bytes.toBytes(Long.MaxValue))), b))
+      }
+    case a: Short =>
+      val b =  Bytes.toBytes(a)
+      if (a >= 0) {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(0: Short), b),
+            BoundRange(Bytes.toBytes(Short.MinValue),  Bytes.toBytes(-1: Short))),
+          Array(BoundRange(b,  Bytes.toBytes(Short.MaxValue))), b))
+      } else {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(Short.MinValue), b)),
+          Array(BoundRange(b, Bytes.toBytes(-1: Short)),
+            BoundRange(Bytes.toBytes(0: Short), Bytes.toBytes(Short.MaxValue))), b))
+      }
+    case a: Double =>
+      val b =  Bytes.toBytes(a)
+      if (a >= 0.0f) {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(0.0d), b),
+            BoundRange(Bytes.toBytes(-0.0d),  Bytes.toBytes(Double.MinValue))),
+          Array(BoundRange(b,  Bytes.toBytes(Double.MaxValue))), b))
+      } else {
+        Some(BoundRanges(
+          Array(BoundRange(b, Bytes.toBytes(Double.MinValue))),
+          Array(BoundRange(Bytes.toBytes(-0.0d), b),
+            BoundRange(Bytes.toBytes(0.0d), Bytes.toBytes(Double.MaxValue))), b))
+      }
+    case a: Float =>
+      val b =  Bytes.toBytes(a)
+      if (a >= 0.0f) {
+        Some(BoundRanges(
+          Array(BoundRange(Bytes.toBytes(0.0f), b),
+            BoundRange(Bytes.toBytes(-0.0f),  Bytes.toBytes(Float.MinValue))),
+          Array(BoundRange(b,  Bytes.toBytes(Float.MaxValue))), b))
+      } else {
+        Some(BoundRanges(
+          Array(BoundRange(b, Bytes.toBytes(Float.MinValue))),
+          Array(BoundRange(Bytes.toBytes(-0.0f), b),
+            BoundRange(Bytes.toBytes(0.0f), Bytes.toBytes(Float.MaxValue))), b))
+      }
+    case a: Array[Byte] =>
+      Some(BoundRanges(
+        Array(BoundRange(bytesMin, a)),
+        Array(BoundRange(a, bytesMax)), a))
+    case a: Byte =>
+      val b =  Array(a)
+      Some(BoundRanges(
+        Array(BoundRange(bytesMin, b)),
+        Array(BoundRange(b, bytesMax)), b))
+    case a: String =>
+      val b =  Bytes.toBytes(a)
+      Some(BoundRanges(
+        Array(BoundRange(bytesMin, b)),
+        Array(BoundRange(b, bytesMax)), b))
+    case a: UTF8String =>
+      val b = a.getBytes
+      Some(BoundRanges(
+        Array(BoundRange(bytesMin, b)),
+        Array(BoundRange(b, bytesMax)), b))
+    case _ => None
+  }
+
+  def compare(c: Int, ops: JavaBytesEncoder): Boolean = {
+    ops match {
+      case JavaBytesEncoder.Greater =>  c > 0
+      case JavaBytesEncoder.GreaterEqual =>  c >= 0
+      case JavaBytesEncoder.Less =>  c < 0
+      case JavaBytesEncoder.LessEqual =>  c <= 0
+    }
+  }
+
+  /**
+    * encode the data type into byte array. Note that it is a naive implementation with the
+    * data type byte appending to the head of the serialized byte array.
+    *
+    * @param dt: The data type of the input
+    * @param value: the value of the input
+    * @return the byte array with the first byte indicating the data type.
+    */
+  override def encode(dt: DataType,
+                      value: Any): Array[Byte] = {
+    dt match {
+      case BooleanType =>
+        val result = new Array[Byte](Bytes.SIZEOF_BOOLEAN + 1)
+        result(0) = BooleanEnc
+        value.asInstanceOf[Boolean] match {
+          case true => result(1) = -1: Byte
+          case false => result(1) = 0: Byte
+        }
+        result
+      case ShortType =>
+        val result = new Array[Byte](Bytes.SIZEOF_SHORT + 1)
+        result(0) = ShortEnc
+        Bytes.putShort(result, 1, value.asInstanceOf[Short])
+        result
+      case IntegerType =>
+        val result = new Array[Byte](Bytes.SIZEOF_INT + 1)
+        result(0) = IntEnc
+        Bytes.putInt(result, 1, value.asInstanceOf[Int])
+        result
+      case LongType|TimestampType =>
+        val result = new Array[Byte](Bytes.SIZEOF_LONG + 1)
+        result(0) = LongEnc
+        Bytes.putLong(result, 1, value.asInstanceOf[Long])
+        result
+      case FloatType =>
+        val result = new Array[Byte](Bytes.SIZEOF_FLOAT + 1)
+        result(0) = FloatEnc
+        Bytes.putFloat(result, 1, value.asInstanceOf[Float])
+        result
+      case DoubleType =>
+        val result = new Array[Byte](Bytes.SIZEOF_DOUBLE + 1)
+        result(0) = DoubleEnc
+        Bytes.putDouble(result, 1, value.asInstanceOf[Double])
+        result
+      case BinaryType =>
+        val v = value.asInstanceOf[Array[Bytes]]
+        val result = new Array[Byte](v.length + 1)
+        result(0) = BinaryEnc
+        System.arraycopy(v, 0, result, 1, v.length)
+        result
+      case StringType =>
+        val bytes = Bytes.toBytes(value.asInstanceOf[String])
+        val result = new Array[Byte](bytes.length + 1)
+        result(0) = StringEnc
+        System.arraycopy(bytes, 0, result, 1, bytes.length)
+        result
+      case _ =>
+        val bytes = Bytes.toBytes(value.toString)
+        val result = new Array[Byte](bytes.length + 1)
+        result(0) = UnknownEnc
+        System.arraycopy(bytes, 0, result, 1, bytes.length)
+        result
+    }
+  }
+
+  override def filter(input: Array[Byte], offset1: Int, length1: Int,
+                      filterBytes: Array[Byte], offset2: Int, length2: Int,
+                      ops: JavaBytesEncoder): Boolean = {
+    filterBytes(offset2) match {
+      case ShortEnc =>
+        val in = Bytes.toShort(input, offset1)
+        val value = Bytes.toShort(filterBytes, offset2 + 1)
+        compare(in.compareTo(value), ops)
+      case IntEnc =>
+        val in = Bytes.toInt(input, offset1)
+        val value = Bytes.toInt(filterBytes, offset2 + 1)
+        compare(in.compareTo(value), ops)
+      case LongEnc | TimestampEnc =>
+        val in = Bytes.toInt(input, offset1)
+        val value = Bytes.toInt(filterBytes, offset2 + 1)
+        compare(in.compareTo(value), ops)
+      case FloatEnc =>
+        val in = Bytes.toFloat(input, offset1)
+        val value = Bytes.toFloat(filterBytes, offset2 + 1)
+        compare(in.compareTo(value), ops)
+      case DoubleEnc =>
+        val in = Bytes.toDouble(input, offset1)
+        val value = Bytes.toDouble(filterBytes, offset2 + 1)
+        compare(in.compareTo(value), ops)
+      case _ =>
+        // for String, Byte, Binary, Boolean and other types
+        // we can use the order of byte array directly.
+        compare(
+          Bytes.compareTo(input, offset1, length1, filterBytes, offset2 + 1, length2 - 1), ops)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SchemaConverters.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SchemaConverters.scala
new file mode 100644
index 0000000..9eeabc5
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SchemaConverters.scala
@@ -0,0 +1,430 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.io.ByteArrayInputStream
+import java.nio.ByteBuffer
+import java.sql.Timestamp
+import java.util
+import java.util.HashMap
+
+import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder
+import org.apache.avro.SchemaBuilder.BaseTypeBuilder
+import org.apache.avro.SchemaBuilder.FieldAssembler
+import org.apache.avro.SchemaBuilder.FieldDefault
+import org.apache.avro.SchemaBuilder.RecordBuilder
+import org.apache.avro.io._
+import org.apache.commons.io.output.ByteArrayOutputStream
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes
+
+import scala.collection.JavaConversions._
+
+import org.apache.avro.{SchemaBuilder, Schema}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.GenericData.{Record, Fixed}
+import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericData, GenericRecord}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+
+import scala.collection.immutable.Map
+
+@InterfaceAudience.Private
+abstract class AvroException(msg: String) extends Exception(msg)
+
+@InterfaceAudience.Private
+case class SchemaConversionException(msg: String) extends AvroException(msg)
+
+/***
+  * On top level, the converters provide three high level interface.
+  * 1. toSqlType: This function takes an avro schema and returns a sql schema.
+  * 2. createConverterToSQL: Returns a function that is used to convert avro types to their
+  *    corresponding sparkSQL representations.
+  * 3. convertTypeToAvro: This function constructs converter function for a given sparkSQL
+  *    datatype. This is used in writing Avro records out to disk
+  */
+@InterfaceAudience.Private
+object SchemaConverters {
+
+  case class SchemaType(dataType: DataType, nullable: Boolean)
+
+  /**
+    * This function takes an avro schema and returns a sql schema.
+    */
+  def toSqlType(avroSchema: Schema): SchemaType = {
+    avroSchema.getType match {
+      case INT => SchemaType(IntegerType, nullable = false)
+      case STRING => SchemaType(StringType, nullable = false)
+      case BOOLEAN => SchemaType(BooleanType, nullable = false)
+      case BYTES => SchemaType(BinaryType, nullable = false)
+      case DOUBLE => SchemaType(DoubleType, nullable = false)
+      case FLOAT => SchemaType(FloatType, nullable = false)
+      case LONG => SchemaType(LongType, nullable = false)
+      case FIXED => SchemaType(BinaryType, nullable = false)
+      case ENUM => SchemaType(StringType, nullable = false)
+
+      case RECORD =>
+        val fields = avroSchema.getFields.map { f =>
+          val schemaType = toSqlType(f.schema())
+          StructField(f.name, schemaType.dataType, schemaType.nullable)
+        }
+
+        SchemaType(StructType(fields), nullable = false)
+
+      case ARRAY =>
+        val schemaType = toSqlType(avroSchema.getElementType)
+        SchemaType(
+          ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
+          nullable = false)
+
+      case MAP =>
+        val schemaType = toSqlType(avroSchema.getValueType)
+        SchemaType(
+          MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
+          nullable = false)
+
+      case UNION =>
+        if (avroSchema.getTypes.exists(_.getType == NULL)) {
+          // In case of a union with null, eliminate it and make a recursive call
+          val remainingUnionTypes = avroSchema.getTypes.filterNot(_.getType == NULL)
+          if (remainingUnionTypes.size == 1) {
+            toSqlType(remainingUnionTypes.get(0)).copy(nullable = true)
+          } else {
+            toSqlType(Schema.createUnion(remainingUnionTypes)).copy(nullable = true)
+          }
+        } else avroSchema.getTypes.map(_.getType) match {
+          case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
+            SchemaType(LongType, nullable = false)
+          case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
+            SchemaType(DoubleType, nullable = false)
+          case other => throw new SchemaConversionException(
+            s"This mix of union types is not supported: $other")
+        }
+
+      case other => throw new SchemaConversionException(s"Unsupported type $other")
+    }
+  }
+
+  /**
+    * This function converts sparkSQL StructType into avro schema. This method uses two other
+    * converter methods in order to do the conversion.
+    */
+  private def convertStructToAvro[T](
+                                      structType: StructType,
+                                      schemaBuilder: RecordBuilder[T],
+                                      recordNamespace: String): T = {
+    val fieldsAssembler: FieldAssembler[T] = schemaBuilder.fields()
+    structType.fields.foreach { field =>
+      val newField = fieldsAssembler.name(field.name).`type`()
+
+      if (field.nullable) {
+        convertFieldTypeToAvro(field.dataType, newField.nullable(), field.name, recordNamespace)
+          .noDefault
+      } else {
+        convertFieldTypeToAvro(field.dataType, newField, field.name, recordNamespace)
+          .noDefault
+      }
+    }
+    fieldsAssembler.endRecord()
+  }
+
+  /**
+    * Returns a function that is used to convert avro types to their
+    * corresponding sparkSQL representations.
+    */
+  def createConverterToSQL(schema: Schema): Any => Any = {
+    schema.getType match {
+      // Avro strings are in Utf8, so we have to call toString on them
+      case STRING | ENUM => (item: Any) => if (item == null) null else item.toString
+      case INT | BOOLEAN | DOUBLE | FLOAT | LONG => identity
+      // Byte arrays are reused by avro, so we have to make a copy of them.
+      case FIXED => (item: Any) => if (item == null) {
+        null
+      } else {
+        item.asInstanceOf[Fixed].bytes().clone()
+      }
+      case BYTES => (item: Any) => if (item == null) {
+        null
+      } else {
+        val bytes = item.asInstanceOf[ByteBuffer]
+        val javaBytes = new Array[Byte](bytes.remaining)
+        bytes.get(javaBytes)
+        javaBytes
+      }
+      case RECORD =>
+        val fieldConverters = schema.getFields.map(f => createConverterToSQL(f.schema))
+        (item: Any) => if (item == null) {
+          null
+        } else {
+          val record = item.asInstanceOf[GenericRecord]
+          val converted = new Array[Any](fieldConverters.size)
+          var idx = 0
+          while (idx < fieldConverters.size) {
+            converted(idx) = fieldConverters.apply(idx)(record.get(idx))
+            idx += 1
+          }
+          Row.fromSeq(converted.toSeq)
+        }
+      case ARRAY =>
+        val elementConverter = createConverterToSQL(schema.getElementType)
+        (item: Any) => if (item == null) {
+          null
+        } else {
+          try {
+            item.asInstanceOf[GenericData.Array[Any]].map(elementConverter)
+          } catch {
+            case e: Throwable =>
+              item.asInstanceOf[util.ArrayList[Any]].map(elementConverter)
+          }
+        }
+      case MAP =>
+        val valueConverter = createConverterToSQL(schema.getValueType)
+        (item: Any) => if (item == null) {
+          null
+        } else {
+          item.asInstanceOf[HashMap[Any, Any]].map(x => (x._1.toString, valueConverter(x._2))).toMap
+        }
+      case UNION =>
+        if (schema.getTypes.exists(_.getType == NULL)) {
+          val remainingUnionTypes = schema.getTypes.filterNot(_.getType == NULL)
+          if (remainingUnionTypes.size == 1) {
+            createConverterToSQL(remainingUnionTypes.get(0))
+          } else {
+            createConverterToSQL(Schema.createUnion(remainingUnionTypes))
+          }
+        } else schema.getTypes.map(_.getType) match {
+          case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
+            (item: Any) => {
+              item match {
+                case l: Long => l
+                case i: Int => i.toLong
+                case null => null
+              }
+            }
+          case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
+            (item: Any) => {
+              item match {
+                case d: Double => d
+                case f: Float => f.toDouble
+                case null => null
+              }
+            }
+          case other => throw new SchemaConversionException(
+            s"This mix of union types is not supported (see README): $other")
+        }
+      case other => throw new SchemaConversionException(s"invalid avro type: $other")
+    }
+  }
+
+  /**
+    * This function is used to convert some sparkSQL type to avro type. Note that this function won't
+    * be used to construct fields of avro record (convertFieldTypeToAvro is used for that).
+    */
+  private def convertTypeToAvro[T](
+                                    dataType: DataType,
+                                    schemaBuilder: BaseTypeBuilder[T],
+                                    structName: String,
+                                    recordNamespace: String): T = {
+    dataType match {
+      case ByteType => schemaBuilder.intType()
+      case ShortType => schemaBuilder.intType()
+      case IntegerType => schemaBuilder.intType()
+      case LongType => schemaBuilder.longType()
+      case FloatType => schemaBuilder.floatType()
+      case DoubleType => schemaBuilder.doubleType()
+      case _: DecimalType => schemaBuilder.stringType()
+      case StringType => schemaBuilder.stringType()
+      case BinaryType => schemaBuilder.bytesType()
+      case BooleanType => schemaBuilder.booleanType()
+      case TimestampType => schemaBuilder.longType()
+
+      case ArrayType(elementType, _) =>
+        val builder = getSchemaBuilder(dataType.asInstanceOf[ArrayType].containsNull)
+        val elementSchema = convertTypeToAvro(elementType, builder, structName, recordNamespace)
+        schemaBuilder.array().items(elementSchema)
+
+      case MapType(StringType, valueType, _) =>
+        val builder = getSchemaBuilder(dataType.asInstanceOf[MapType].valueContainsNull)
+        val valueSchema = convertTypeToAvro(valueType, builder, structName, recordNamespace)
+        schemaBuilder.map().values(valueSchema)
+
+      case structType: StructType =>
+        convertStructToAvro(
+          structType,
+          schemaBuilder.record(structName).namespace(recordNamespace),
+          recordNamespace)
+
+      case other => throw new IllegalArgumentException(s"Unexpected type $dataType.")
+    }
+  }
+
+  /**
+    * This function is used to construct fields of the avro record, where schema of the field is
+    * specified by avro representation of dataType. Since builders for record fields are different
+    * from those for everything else, we have to use a separate method.
+    */
+  private def convertFieldTypeToAvro[T](
+                                         dataType: DataType,
+                                         newFieldBuilder: BaseFieldTypeBuilder[T],
+                                         structName: String,
+                                         recordNamespace: String): FieldDefault[T, _] = {
+    dataType match {
+      case ByteType => newFieldBuilder.intType()
+      case ShortType => newFieldBuilder.intType()
+      case IntegerType => newFieldBuilder.intType()
+      case LongType => newFieldBuilder.longType()
+      case FloatType => newFieldBuilder.floatType()
+      case DoubleType => newFieldBuilder.doubleType()
+      case _: DecimalType => newFieldBuilder.stringType()
+      case StringType => newFieldBuilder.stringType()
+      case BinaryType => newFieldBuilder.bytesType()
+      case BooleanType => newFieldBuilder.booleanType()
+      case TimestampType => newFieldBuilder.longType()
+
+      case ArrayType(elementType, _) =>
+        val builder = getSchemaBuilder(dataType.asInstanceOf[ArrayType].containsNull)
+        val elementSchema = convertTypeToAvro(elementType, builder, structName, recordNamespace)
+        newFieldBuilder.array().items(elementSchema)
+
+      case MapType(StringType, valueType, _) =>
+        val builder = getSchemaBuilder(dataType.asInstanceOf[MapType].valueContainsNull)
+        val valueSchema = convertTypeToAvro(valueType, builder, structName, recordNamespace)
+        newFieldBuilder.map().values(valueSchema)
+
+      case structType: StructType =>
+        convertStructToAvro(
+          structType,
+          newFieldBuilder.record(structName).namespace(recordNamespace),
+          recordNamespace)
+
+      case other => throw new IllegalArgumentException(s"Unexpected type $dataType.")
+    }
+  }
+
+  private def getSchemaBuilder(isNullable: Boolean): BaseTypeBuilder[Schema] = {
+    if (isNullable) {
+      SchemaBuilder.builder().nullable()
+    } else {
+      SchemaBuilder.builder()
+    }
+  }
+  /**
+    * This function constructs converter function for a given sparkSQL datatype. This is used in
+    * writing Avro records out to disk
+    */
+  def createConverterToAvro(
+                             dataType: DataType,
+                             structName: String,
+                             recordNamespace: String): (Any) => Any = {
+    dataType match {
+      case BinaryType => (item: Any) => item match {
+        case null => null
+        case bytes: Array[Byte] => ByteBuffer.wrap(bytes)
+      }
+      case ByteType | ShortType | IntegerType | LongType |
+           FloatType | DoubleType | StringType | BooleanType => identity
+      case _: DecimalType => (item: Any) => if (item == null) null else item.toString
+      case TimestampType => (item: Any) =>
+        if (item == null) null else item.asInstanceOf[Timestamp].getTime
+      case ArrayType(elementType, _) =>
+        val elementConverter = createConverterToAvro(elementType, structName, recordNamespace)
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val sourceArray = item.asInstanceOf[Seq[Any]]
+            val sourceArraySize = sourceArray.size
+            val targetArray = new util.ArrayList[Any](sourceArraySize)
+            var idx = 0
+            while (idx < sourceArraySize) {
+              targetArray.add(elementConverter(sourceArray(idx)))
+              idx += 1
+            }
+            targetArray
+          }
+        }
+      case MapType(StringType, valueType, _) =>
+        val valueConverter = createConverterToAvro(valueType, structName, recordNamespace)
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val javaMap = new HashMap[String, Any]()
+            item.asInstanceOf[Map[String, Any]].foreach { case (key, value) =>
+              javaMap.put(key, valueConverter(value))
+            }
+            javaMap
+          }
+        }
+      case structType: StructType =>
+        val builder = SchemaBuilder.record(structName).namespace(recordNamespace)
+        val schema: Schema = SchemaConverters.convertStructToAvro(
+          structType, builder, recordNamespace)
+        val fieldConverters = structType.fields.map(field =>
+          createConverterToAvro(field.dataType, field.name, recordNamespace))
+        (item: Any) => {
+          if (item == null) {
+            null
+          } else {
+            val record = new Record(schema)
+            val convertersIterator = fieldConverters.iterator
+            val fieldNamesIterator = dataType.asInstanceOf[StructType].fieldNames.iterator
+            val rowIterator = item.asInstanceOf[Row].toSeq.iterator
+
+            while (convertersIterator.hasNext) {
+              val converter = convertersIterator.next()
+              record.put(fieldNamesIterator.next(), converter(rowIterator.next()))
+            }
+            record
+          }
+        }
+    }
+  }
+}
+
+@InterfaceAudience.Private
+object AvroSerdes {
+  // We only handle top level is record or primary type now
+  def serialize(input: Any, schema: Schema): Array[Byte]= {
+    schema.getType match {
+      case BOOLEAN => Bytes.toBytes(input.asInstanceOf[Boolean])
+      case BYTES | FIXED=> input.asInstanceOf[Array[Byte]]
+      case DOUBLE => Bytes.toBytes(input.asInstanceOf[Double])
+      case FLOAT => Bytes.toBytes(input.asInstanceOf[Float])
+      case INT => Bytes.toBytes(input.asInstanceOf[Int])
+      case LONG => Bytes.toBytes(input.asInstanceOf[Long])
+      case STRING => Bytes.toBytes(input.asInstanceOf[String])
+      case RECORD =>
+        val gr = input.asInstanceOf[GenericRecord]
+        val writer2 = new GenericDatumWriter[GenericRecord](schema)
+        val bao2 = new ByteArrayOutputStream()
+        val encoder2: BinaryEncoder = EncoderFactory.get().directBinaryEncoder(bao2, null)
+        writer2.write(gr, encoder2)
+        bao2.toByteArray()
+      case _ => throw new Exception(s"unsupported data type ${schema.getType}") //TODO
+    }
+  }
+
+  def deserialize(input: Array[Byte], schema: Schema): GenericRecord = {
+    val reader2: DatumReader[GenericRecord] = new GenericDatumReader[GenericRecord](schema)
+    val bai2 = new ByteArrayInputStream(input)
+    val decoder2: BinaryDecoder = DecoderFactory.get().directBinaryDecoder(bai2, null)
+    val gr2: GenericRecord = reader2.read(null, decoder2)
+    gr2
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerDes.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerDes.scala
new file mode 100644
index 0000000..59e44f3
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerDes.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.yetus.audience.InterfaceAudience
+
+// TODO: This is not really used in code.
+@InterfaceAudience.Public
+trait SerDes {
+  def serialize(value: Any): Array[Byte]
+  def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
+}
+
+// TODO: This is not really used in code.
+@InterfaceAudience.Private
+class DoubleSerDes extends SerDes {
+  override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
+  override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
+    Bytes.toDouble(bytes, start)
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerializableConfiguration.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerializableConfiguration.scala
new file mode 100644
index 0000000..0e2b6f4
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerializableConfiguration.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.yetus.audience.InterfaceAudience;
+
+import scala.util.control.NonFatal
+
+@InterfaceAudience.Private
+class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
+  private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException {
+    out.defaultWriteObject()
+    value.write(out)
+  }
+
+  private def readObject(in: ObjectInputStream): Unit = tryOrIOException {
+    value = new Configuration(false)
+    value.readFields(in)
+  }
+
+  def tryOrIOException(block: => Unit) {
+    try {
+      block
+    } catch {
+      case e: IOException => throw e
+      case NonFatal(t) => throw new IOException(t)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala
new file mode 100644
index 0000000..093c6ac
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala
@@ -0,0 +1,100 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.datasources
+
+import org.apache.hadoop.hbase.spark.AvroSerdes
+import org.apache.hadoop.hbase.util.Bytes
+//import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+object Utils {
+
+
+  /**
+    * Parses the hbase field to it's corresponding
+    * scala type which can then be put into a Spark GenericRow
+    * which is then automatically converted by Spark.
+    */
+  def hbaseFieldToScalaType(
+      f: Field,
+      src: Array[Byte],
+      offset: Int,
+      length: Int): Any = {
+    if (f.exeSchema.isDefined) {
+      // If we have avro schema defined, use it to get record, and then convert them to catalyst data type
+      val m = AvroSerdes.deserialize(src, f.exeSchema.get)
+      val n = f.avroToCatalyst.map(_(m))
+      n.get
+    } else  {
+      // Fall back to atomic type
+      f.dt match {
+        case BooleanType => toBoolean(src, offset)
+        case ByteType => src(offset)
+        case DoubleType => Bytes.toDouble(src, offset)
+        case FloatType => Bytes.toFloat(src, offset)
+        case IntegerType => Bytes.toInt(src, offset)
+        case LongType|TimestampType => Bytes.toLong(src, offset)
+        case ShortType => Bytes.toShort(src, offset)
+        case StringType => toUTF8String(src, offset, length)
+        case BinaryType =>
+          val newArray = new Array[Byte](length)
+          System.arraycopy(src, offset, newArray, 0, length)
+          newArray
+        // TODO: SparkSqlSerializer.deserialize[Any](src)
+        case _ => throw new Exception(s"unsupported data type ${f.dt}")
+      }
+    }
+  }
+
+  // convert input to data type
+  def toBytes(input: Any, field: Field): Array[Byte] = {
+    if (field.schema.isDefined) {
+      // Here we assume the top level type is structType
+      val record = field.catalystToAvro(input)
+      AvroSerdes.serialize(record, field.schema.get)
+    } else {
+      input match {
+        case data: Boolean => Bytes.toBytes(data)
+        case data: Byte => Array(data)
+        case data: Array[Byte] => data
+        case data: Double => Bytes.toBytes(data)
+        case data: Float => Bytes.toBytes(data)
+        case data: Int => Bytes.toBytes(data)
+        case data: Long => Bytes.toBytes(data)
+        case data: Short => Bytes.toBytes(data)
+        case data: UTF8String => data.getBytes
+        case data: String => Bytes.toBytes(data)
+        // TODO: add more data type support
+        case _ => throw new Exception(s"unsupported data type ${field.dt}")
+      }
+    }
+  }
+
+  def toBoolean(input: Array[Byte], offset: Int): Boolean = {
+    input(offset) != 0
+  }
+
+  def toUTF8String(input: Array[Byte], offset: Int, length: Int): UTF8String = {
+    UTF8String.fromBytes(input.slice(offset, offset + length))
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/package.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/package.scala
new file mode 100644
index 0000000..8f1f15c
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/package.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.util.Bytes
+
+import scala.math.Ordering
+
+// TODO: add @InterfaceAudience.Private if https://issues.scala-lang.org/browse/SI-3600 is resolved
+package object hbase {
+  type HBaseType = Array[Byte]
+  def bytesMin = new Array[Byte](0)
+  def bytesMax = null
+  val ByteMax = -1.asInstanceOf[Byte]
+  val ByteMin = 0.asInstanceOf[Byte]
+  val ord: Ordering[HBaseType] = new Ordering[HBaseType] {
+    def compare(x: Array[Byte], y: Array[Byte]): Int = {
+      return Bytes.compareTo(x, y)
+    }
+  }
+  //Do not use BinaryType.ordering
+  implicit val order: Ordering[HBaseType] = ord
+
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/AvroSource.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/AvroSource.scala
new file mode 100644
index 0000000..068b1af
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/AvroSource.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.datasources
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericData
+import org.apache.hadoop.hbase.spark.AvroSerdes
+import org.apache.hadoop.hbase.spark.datasources.HBaseTableCatalog
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * @param col0 Column #0, Type is String
+ * @param col1 Column #1, Type is Array[Byte]
+ */
+@InterfaceAudience.Private
+case class AvroHBaseRecord(col0: String,
+                           col1: Array[Byte])
+@InterfaceAudience.Private
+object AvroHBaseRecord {
+  val schemaString =
+    s"""{"namespace": "example.avro",
+        |   "type": "record",      "name": "User",
+        |    "fields": [
+        |        {"name": "name", "type": "string"},
+        |        {"name": "favorite_number",  "type": ["int", "null"]},
+        |        {"name": "favorite_color", "type": ["string", "null"]},
+        |        {"name": "favorite_array", "type": {"type": "array", "items": "string"}},
+        |        {"name": "favorite_map", "type": {"type": "map", "values": "int"}}
+        |      ]    }""".stripMargin
+
+  val avroSchema: Schema = {
+    val p = new Schema.Parser
+    p.parse(schemaString)
+  }
+
+  def apply(i: Int): AvroHBaseRecord = {
+
+    val user = new GenericData.Record(avroSchema);
+    user.put("name", s"name${"%03d".format(i)}")
+    user.put("favorite_number", i)
+    user.put("favorite_color", s"color${"%03d".format(i)}")
+    val favoriteArray = new GenericData.Array[String](2, avroSchema.getField("favorite_array").schema())
+    favoriteArray.add(s"number${i}")
+    favoriteArray.add(s"number${i+1}")
+    user.put("favorite_array", favoriteArray)
+    import scala.collection.JavaConverters._
+    val favoriteMap = Map[String, Int](("key1" -> i), ("key2" -> (i+1))).asJava
+    user.put("favorite_map", favoriteMap)
+    val avroByte = AvroSerdes.serialize(user, avroSchema)
+    AvroHBaseRecord(s"name${"%03d".format(i)}", avroByte)
+  }
+}
+
+@InterfaceAudience.Private
+object AvroSource {
+  def catalog = s"""{
+                    |"table":{"namespace":"default", "name":"ExampleAvrotable"},
+                    |"rowkey":"key",
+                    |"columns":{
+                    |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
+                    |"col1":{"cf":"cf1", "col":"col1", "type":"binary"}
+                    |}
+                    |}""".stripMargin
+
+  def avroCatalog = s"""{
+                        |"table":{"namespace":"default", "name":"ExampleAvrotable"},
+                        |"rowkey":"key",
+                        |"columns":{
+                        |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
+                        |"col1":{"cf":"cf1", "col":"col1", "avro":"avroSchema"}
+                        |}
+                        |}""".stripMargin
+
+  def avroCatalogInsert = s"""{
+                              |"table":{"namespace":"default", "name":"ExampleAvrotableInsert"},
+                              |"rowkey":"key",
+                              |"columns":{
+                              |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
+                              |"col1":{"cf":"cf1", "col":"col1", "avro":"avroSchema"}
+                              |}
+                              |}""".stripMargin
+
+  def main(args: Array[String]) {
+    val sparkConf = new SparkConf().setAppName("AvroSourceExample")
+    val sc = new SparkContext(sparkConf)
+    val sqlContext = new SQLContext(sc)
+
+    import sqlContext.implicits._
+
+    def withCatalog(cat: String): DataFrame = {
+      sqlContext
+        .read
+        .options(Map("avroSchema" -> AvroHBaseRecord.schemaString, HBaseTableCatalog.tableCatalog -> avroCatalog))
+        .format("org.apache.hadoop.hbase.spark")
+        .load()
+    }
+
+    val data = (0 to 255).map { i =>
+      AvroHBaseRecord(i)
+    }
+
+    sc.parallelize(data).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> catalog, HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+
+    val df = withCatalog(catalog)
+    df.show()
+    df.printSchema()
+    df.registerTempTable("ExampleAvrotable")
+    val c = sqlContext.sql("select count(1) from ExampleAvrotable")
+    c.show()
+
+    val filtered = df.select($"col0", $"col1.favorite_array").where($"col0" === "name001")
+    filtered.show()
+    val collected = filtered.collect()
+    if (collected(0).getSeq[String](1)(0) != "number1") {
+      throw new UserCustomizedSampleException("value invalid")
+    }
+    if (collected(0).getSeq[String](1)(1) != "number2") {
+      throw new UserCustomizedSampleException("value invalid")
+    }
+
+    df.write.options(
+      Map("avroSchema"->AvroHBaseRecord.schemaString, HBaseTableCatalog.tableCatalog->avroCatalogInsert,
+        HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+    val newDF = withCatalog(avroCatalogInsert)
+    newDF.show()
+    newDF.printSchema()
+    if(newDF.count() != 256) {
+      throw new UserCustomizedSampleException("value invalid")
+    }
+
+    df.filter($"col1.name" === "name005" || $"col1.name" <= "name005")
+      .select("col0", "col1.favorite_color", "col1.favorite_number")
+      .show()
+
+    df.filter($"col1.name" <= "name005" || $"col1.name".contains("name007"))
+      .select("col0", "col1.favorite_color", "col1.favorite_number")
+      .show()
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/DataType.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/DataType.scala
new file mode 100644
index 0000000..ac7e776
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/DataType.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.datasources
+
+import org.apache.hadoop.hbase.spark.datasources.HBaseTableCatalog
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+@InterfaceAudience.Private
+class UserCustomizedSampleException(message: String = null, cause: Throwable = null) extends
+  RuntimeException(UserCustomizedSampleException.message(message, cause), cause)
+
+@InterfaceAudience.Private
+object UserCustomizedSampleException {
+  def message(message: String, cause: Throwable) =
+    if (message != null) message
+    else if (cause != null) cause.toString()
+    else null
+}
+
+@InterfaceAudience.Private
+case class IntKeyRecord(
+  col0: Integer,
+  col1: Boolean,
+  col2: Double,
+  col3: Float,
+  col4: Int,
+  col5: Long,
+  col6: Short,
+  col7: String,
+  col8: Byte)
+
+object IntKeyRecord {
+  def apply(i: Int): IntKeyRecord = {
+    IntKeyRecord(if (i % 2 == 0) i else -i,
+      i % 2 == 0,
+      i.toDouble,
+      i.toFloat,
+      i,
+      i.toLong,
+      i.toShort,
+      s"String$i extra",
+      i.toByte)
+  }
+}
+
+@InterfaceAudience.Private
+object DataType {
+  val cat = s"""{
+                |"table":{"namespace":"default", "name":"DataTypeExampleTable"},
+                |"rowkey":"key",
+                |"columns":{
+                |"col0":{"cf":"rowkey", "col":"key", "type":"int"},
+                |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
+                |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
+                |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
+                |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
+                |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
+                |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
+                |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
+                |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
+                |}
+                |}""".stripMargin
+
+  def main(args: Array[String]){
+    val sparkConf = new SparkConf().setAppName("DataTypeExample")
+    val sc = new SparkContext(sparkConf)
+    val sqlContext = new SQLContext(sc)
+
+    import sqlContext.implicits._
+
+    def withCatalog(cat: String): DataFrame = {
+      sqlContext
+        .read
+        .options(Map(HBaseTableCatalog.tableCatalog->cat))
+        .format("org.apache.hadoop.hbase.spark")
+        .load()
+    }
+
+    // test populate table
+    val data = (0 until 32).map { i =>
+      IntKeyRecord(i)
+    }
+    sc.parallelize(data).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> cat, HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+
+    // test less than 0
+    val df = withCatalog(cat)
+    val s = df.filter($"col0" < 0)
+    s.show()
+    if(s.count() != 16){
+      throw new UserCustomizedSampleException("value invalid")
+    }
+
+    //test less or equal than -10. The number of results is 11
+    val num1 = df.filter($"col0" <= -10)
+    num1.show()
+    val c1 = num1.count()
+    println(s"test result count should be 11: $c1")
+
+    //test less or equal than -9. The number of results is 12
+    val num2 = df.filter($"col0" <= -9)
+    num2.show()
+    val c2 = num2.count()
+    println(s"test result count should be 12: $c2")
+
+    //test greater or equal than -9". The number of results is 21
+    val num3 = df.filter($"col0" >= -9)
+    num3.show()
+    val c3 = num3.count()
+    println(s"test result count should be 21: $c3")
+
+    //test greater or equal than 0. The number of results is 16
+    val num4 = df.filter($"col0" >= 0)
+    num4.show()
+    val c4 = num4.count()
+    println(s"test result count should be 16: $c4")
+
+    //test greater than 10. The number of results is 10
+    val num5 = df.filter($"col0" > 10)
+    num5.show()
+    val c5 = num5.count()
+    println(s"test result count should be 10: $c5")
+
+    // test "and". The number of results is 11
+    val num6 = df.filter($"col0" > -10 && $"col0" <= 10)
+    num6.show()
+    val c6 = num6.count()
+    println(s"test result count should be 11: $c6")
+
+    //test "or". The number of results is 21
+    val num7 = df.filter($"col0" <= -10 || $"col0" > 10)
+    num7.show()
+    val c7 = num7.count()
+    println(s"test result count should be 21: $c7")
+
+    //test "all". The number of results is 32
+    val num8 = df.filter($"col0" >= -100)
+    num8.show()
+    val c8 = num8.count()
+    println(s"test result count should be 32: $c8")
+
+    //test "full query"
+    val df1 = withCatalog(cat)
+    df1.show()
+    val c_df = df1.count()
+    println(s"df count should be 32: $c_df")
+    if(c_df != 32){
+      throw new UserCustomizedSampleException("value invalid")
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/HBaseSource.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/HBaseSource.scala
new file mode 100644
index 0000000..6accae0
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/HBaseSource.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.datasources
+
+import org.apache.hadoop.hbase.spark.datasources.HBaseTableCatalog
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+@InterfaceAudience.Private
+case class HBaseRecord(
+  col0: String,
+  col1: Boolean,
+  col2: Double,
+  col3: Float,
+  col4: Int,
+  col5: Long,
+  col6: Short,
+  col7: String,
+  col8: Byte)
+
+@InterfaceAudience.Private
+object HBaseRecord {
+  def apply(i: Int): HBaseRecord = {
+    val s = s"""row${"%03d".format(i)}"""
+    HBaseRecord(s,
+      i % 2 == 0,
+      i.toDouble,
+      i.toFloat,
+      i,
+      i.toLong,
+      i.toShort,
+      s"String$i extra",
+      i.toByte)
+  }
+}
+
+@InterfaceAudience.Private
+object HBaseSource {
+  val cat = s"""{
+                |"table":{"namespace":"default", "name":"HBaseSourceExampleTable"},
+                |"rowkey":"key",
+                |"columns":{
+                |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
+                |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
+                |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
+                |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
+                |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
+                |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
+                |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
+                |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
+                |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
+                |}
+                |}""".stripMargin
+
+  def main(args: Array[String]) {
+    val sparkConf = new SparkConf().setAppName("HBaseSourceExample")
+    val sc = new SparkContext(sparkConf)
+    val sqlContext = new SQLContext(sc)
+
+    import sqlContext.implicits._
+
+    def withCatalog(cat: String): DataFrame = {
+      sqlContext
+        .read
+        .options(Map(HBaseTableCatalog.tableCatalog->cat))
+        .format("org.apache.hadoop.hbase.spark")
+        .load()
+    }
+
+    val data = (0 to 255).map { i =>
+      HBaseRecord(i)
+    }
+
+    sc.parallelize(data).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> cat, HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+
+    val df = withCatalog(cat)
+    df.show()
+    df.filter($"col0" <= "row005")
+      .select($"col0", $"col1").show
+    df.filter($"col0" === "row005" || $"col0" <= "row005")
+      .select($"col0", $"col1").show
+    df.filter($"col0" > "row250")
+      .select($"col0", $"col1").show
+    df.registerTempTable("table1")
+    val c = sqlContext.sql("select count(col1) from table1 where col0 < 'row050'")
+    c.show()
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala
new file mode 100644
index 0000000..506fd22
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Delete
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of deleting records in HBase
+ * with the bulkDelete function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkDeleteExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseBulkDeleteExample {tableName} missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
+    val sc = new SparkContext(sparkConf)
+    try {
+      //[Array[Byte]]
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("1"),
+        Bytes.toBytes("2"),
+        Bytes.toBytes("3"),
+        Bytes.toBytes("4"),
+        Bytes.toBytes("5")
+      ))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+      hbaseContext.bulkDelete[Array[Byte]](rdd,
+        TableName.valueOf(tableName),
+        putRecord => new Delete(putRecord),
+        4)
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala
new file mode 100644
index 0000000..58bc1d4
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Get
+import org.apache.hadoop.hbase.client.Result
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.CellUtil
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of getting records from HBase
+ * with the bulkGet function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkGetExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseBulkGetExample {tableName} missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+
+      //[(Array[Byte])]
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("1"),
+        Bytes.toBytes("2"),
+        Bytes.toBytes("3"),
+        Bytes.toBytes("4"),
+        Bytes.toBytes("5"),
+        Bytes.toBytes("6"),
+        Bytes.toBytes("7")))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      val getRdd = hbaseContext.bulkGet[Array[Byte], String](
+        TableName.valueOf(tableName),
+        2,
+        rdd,
+        record => {
+          System.out.println("making Get")
+          new Get(record)
+        },
+        (result: Result) => {
+
+          val it = result.listCells().iterator()
+          val b = new StringBuilder
+
+          b.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next()
+            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+            if (q.equals("counter")) {
+              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+            } else {
+              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+            }
+          }
+          b.toString()
+        })
+
+      getRdd.collect().foreach(v => println(v))
+
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala
new file mode 100644
index 0000000..0a6f379
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of putting records in HBase
+ * with the bulkPut function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkPutExample {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
+      return
+    }
+
+    val tableName = args(0)
+    val columnFamily = args(1)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
+      tableName + " " + columnFamily)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
+      val rdd = sc.parallelize(Array(
+        (Bytes.toBytes("1"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
+        (Bytes.toBytes("2"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
+        (Bytes.toBytes("3"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
+        (Bytes.toBytes("4"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
+        (Bytes.toBytes("5"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
+      ))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
+        TableName.valueOf(tableName),
+        (putRecord) => {
+          val put = new Put(putRecord._1)
+          putRecord._2.foreach((putValue) =>
+            put.addColumn(putValue._1, putValue._2, putValue._3))
+          put
+        });
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala
new file mode 100644
index 0000000..51ff0da
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.io.LongWritable
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.mapred.TextInputFormat
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of putting records in HBase
+ * with the bulkPut function.  In this example we are
+ * getting the put information from a file
+ */
+@InterfaceAudience.Private
+object HBaseBulkPutExampleFromFile {
+  def main(args: Array[String]) {
+    if (args.length < 3) {
+      println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+    val columnFamily = args(1)
+    val inputFile = args(2)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " +
+      tableName + " " + columnFamily + " " + inputFile)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+      var rdd = sc.hadoopFile(
+        inputFile,
+        classOf[TextInputFormat],
+        classOf[LongWritable],
+        classOf[Text]).map(v => {
+        System.out.println("reading-" + v._2.toString)
+        v._2.toString
+      })
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+      hbaseContext.bulkPut[String](rdd,
+        TableName.valueOf(tableName),
+        (putRecord) => {
+          System.out.println("hbase-" + putRecord)
+          val put = new Put(Bytes.toBytes("Value- " + putRecord))
+          put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"),
+            Bytes.toBytes(putRecord.length()))
+          put
+        });
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala
new file mode 100644
index 0000000..9bfcc2c
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.spark.SparkContext
+import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.client.Put
+import org.apache.spark.SparkConf
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of putting records in HBase
+ * with the bulkPut function.  In this example we are
+ * also setting the timestamp in the put
+ */
+@InterfaceAudience.Private
+object HBaseBulkPutTimestampExample {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+    val columnFamily = args(1)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
+      tableName + " " + columnFamily)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+
+      val rdd = sc.parallelize(Array(
+        (Bytes.toBytes("6"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
+        (Bytes.toBytes("7"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
+        (Bytes.toBytes("8"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
+        (Bytes.toBytes("9"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
+        (Bytes.toBytes("10"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
+
+      val conf = HBaseConfiguration.create()
+
+      val timeStamp = System.currentTimeMillis()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
+        TableName.valueOf(tableName),
+        (putRecord) => {
+          val put = new Put(putRecord._1)
+          putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
+            timeStamp, putValue._3))
+          put
+        })
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala
new file mode 100644
index 0000000..7d8643a
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+/**
+ * This is a simple example of scanning records from HBase
+ * with the hbaseRDD function in Distributed fashion.
+ */
+@InterfaceAudience.Private
+object HBaseDistributedScanExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseDistributedScanExample {tableName} missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName )
+    val sc = new SparkContext(sparkConf)
+
+    try {
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      val scan = new Scan()
+      scan.setCaching(100)
+
+      val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
+
+      getRdd.foreach(v => println(Bytes.toString(v._1.get())))
+
+      println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length);
+    } finally {
+      sc.stop()
+    }
+  }
+
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala
new file mode 100644
index 0000000..20a22f7
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.hbasecontext
+
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of BulkPut with Spark Streaming
+ */
+@InterfaceAudience.Private
+object HBaseStreamingBulkPutExample {
+  def main(args: Array[String]) {
+    if (args.length < 4) {
+      println("HBaseStreamingBulkPutExample " +
+        "{host} {port} {tableName} {columnFamily} are missing an argument")
+      return
+    }
+
+    val host = args(0)
+    val port = args(1)
+    val tableName = args(2)
+    val columnFamily = args(3)
+
+    val sparkConf = new SparkConf().setAppName("HBaseStreamingBulkPutExample " +
+      tableName + " " + columnFamily)
+    val sc = new SparkContext(sparkConf)
+    try {
+      val ssc = new StreamingContext(sc, Seconds(1))
+
+      val lines = ssc.socketTextStream(host, port.toInt)
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      hbaseContext.streamBulkPut[String](lines,
+        TableName.valueOf(tableName),
+        (putRecord) => {
+          if (putRecord.length() > 0) {
+            val put = new Put(Bytes.toBytes(putRecord))
+            put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar"))
+            put
+          } else {
+            null
+          }
+        })
+      ssc.start()
+      ssc.awaitTerminationOrTimeout(60000)
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala
new file mode 100644
index 0000000..0ba4d1c
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.rdd
+
+import org.apache.hadoop.hbase.client.Delete
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of deleting records in HBase
+ * with the bulkDelete function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkDeleteExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseBulkDeleteExample {tableName} are missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
+    val sc = new SparkContext(sparkConf)
+    try {
+      //[Array[Byte]]
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("1"),
+        Bytes.toBytes("2"),
+        Bytes.toBytes("3"),
+        Bytes.toBytes("4"),
+        Bytes.toBytes("5")
+      ))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName),
+        putRecord => new Delete(putRecord),
+        4)
+
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala
new file mode 100644
index 0000000..0736f6e
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark.example.rdd
+
+import org.apache.hadoop.hbase.client.Get
+import org.apache.hadoop.hbase.client.Result
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.CellUtil
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of getting records from HBase
+ * with the bulkGet function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkGetExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseBulkGetExample {tableName} is missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+
+      //[(Array[Byte])]
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("1"),
+        Bytes.toBytes("2"),
+        Bytes.toBytes("3"),
+        Bytes.toBytes("4"),
+        Bytes.toBytes("5"),
+        Bytes.toBytes("6"),
+        Bytes.toBytes("7")))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2,
+        record => {
+          System.out.println("making Get")
+          new Get(record)
+        },
+        (result: Result) => {
+
+          val it = result.listCells().iterator()
+          val b = new StringBuilder
+
+          b.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next()
+            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+            if (q.equals("counter")) {
+              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+            } else {
+              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+            }
+          }
+          b.toString()
+        })
+
+      getRdd.collect().foreach(v => println(v))
+
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala
new file mode 100644
index 0000000..9f5885f
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.rdd
+
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of putting records in HBase
+ * with the bulkPut function.
+ */
+@InterfaceAudience.Private
+object HBaseBulkPutExample {
+   def main(args: Array[String]) {
+     if (args.length < 2) {
+       println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
+       return
+     }
+
+     val tableName = args(0)
+     val columnFamily = args(1)
+
+     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
+       tableName + " " + columnFamily)
+     val sc = new SparkContext(sparkConf)
+
+     try {
+       //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
+       val rdd = sc.parallelize(Array(
+         (Bytes.toBytes("1"),
+           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
+         (Bytes.toBytes("2"),
+           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
+         (Bytes.toBytes("3"),
+           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
+         (Bytes.toBytes("4"),
+           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
+         (Bytes.toBytes("5"),
+           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
+       ))
+
+       val conf = HBaseConfiguration.create()
+
+       val hbaseContext = new HBaseContext(sc, conf)
+
+       rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName),
+         (putRecord) => {
+           val put = new Put(putRecord._1)
+           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
+             putValue._3))
+           put
+         })
+
+     } finally {
+       sc.stop()
+     }
+   }
+ }
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala
new file mode 100644
index 0000000..be257ee
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.rdd
+
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of using the foreachPartition
+ * method with a HBase connection
+ */
+@InterfaceAudience.Private
+object HBaseForeachPartitionExample {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments")
+      return
+    }
+
+    val tableName = args(0)
+    val columnFamily = args(1)
+
+    val sparkConf = new SparkConf().setAppName("HBaseForeachPartitionExample " +
+      tableName + " " + columnFamily)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
+      val rdd = sc.parallelize(Array(
+        (Bytes.toBytes("1"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
+        (Bytes.toBytes("2"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
+        (Bytes.toBytes("3"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
+        (Bytes.toBytes("4"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
+        (Bytes.toBytes("5"),
+          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
+      ))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+
+      rdd.hbaseForeachPartition(hbaseContext,
+        (it, connection) => {
+          val m = connection.getBufferedMutator(TableName.valueOf(tableName))
+
+          it.foreach(r => {
+            val put = new Put(r._1)
+            r._2.foreach((putValue) =>
+              put.addColumn(putValue._1, putValue._2, putValue._3))
+            m.mutate(put)
+          })
+          m.flush()
+          m.close()
+        })
+
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala
new file mode 100644
index 0000000..0793524
--- /dev/null
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark.example.rdd
+
+import org.apache.hadoop.hbase.client.Get
+import org.apache.hadoop.hbase.spark.HBaseContext
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.TableName
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.yetus.audience.InterfaceAudience
+
+/**
+ * This is a simple example of using the mapPartitions
+ * method with a HBase connection
+ */
+@InterfaceAudience.Private
+object HBaseMapPartitionExample {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("HBaseMapPartitionExample {tableName} is missing an argument")
+      return
+    }
+
+    val tableName = args(0)
+
+    val sparkConf = new SparkConf().setAppName("HBaseMapPartitionExample " + tableName)
+    val sc = new SparkContext(sparkConf)
+
+    try {
+
+      //[(Array[Byte])]
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("1"),
+        Bytes.toBytes("2"),
+        Bytes.toBytes("3"),
+        Bytes.toBytes("4"),
+        Bytes.toBytes("5"),
+        Bytes.toBytes("6"),
+        Bytes.toBytes("7")))
+
+      val conf = HBaseConfiguration.create()
+
+      val hbaseContext = new HBaseContext(sc, conf)
+
+      val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => {
+        val table = connection.getTable(TableName.valueOf(tableName))
+        it.map{r =>
+          //batching would be faster.  This is just an example
+          val result = table.get(new Get(r))
+
+          val it = result.listCells().iterator()
+          val b = new StringBuilder
+
+          b.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next()
+            val q = Bytes.toString(cell.getQualifierArray)
+            if (q.equals("counter")) {
+              b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")")
+            } else {
+              b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")")
+            }
+          }
+          b.toString()
+        }
+      })
+
+      getRdd.collect().foreach(v => println(v))
+
+    } finally {
+      sc.stop()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
new file mode 100644
index 0000000..723b09a
--- /dev/null
+++ b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
@@ -0,0 +1,538 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkDeleteExample;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+
+import org.apache.hbase.thirdparty.com.google.common.io.Files;
+
+@Category({MiscTests.class, MediumTests.class})
+public class TestJavaHBaseContext implements Serializable {
+
+  @ClassRule
+  public static final HBaseClassTestRule TIMEOUT =
+      HBaseClassTestRule.forClass(TestJavaHBaseContext.class);
+
+  private transient JavaSparkContext jsc;
+  HBaseTestingUtility htu;
+  protected static final Logger LOG = LoggerFactory.getLogger(TestJavaHBaseContext.class);
+
+
+
+  byte[] tableName = Bytes.toBytes("t1");
+  byte[] columnFamily = Bytes.toBytes("c");
+  byte[] columnFamily1 = Bytes.toBytes("d");
+  String columnFamilyStr = Bytes.toString(columnFamily);
+  String columnFamilyStr1 = Bytes.toString(columnFamily1);
+
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaHBaseContextSuite");
+
+    File tempDir = Files.createTempDir();
+    tempDir.deleteOnExit();
+
+    htu = new HBaseTestingUtility();
+    try {
+      LOG.info("cleaning up test dir");
+
+      htu.cleanupTestDir();
+
+      LOG.info("starting minicluster");
+
+      htu.startMiniZKCluster();
+      htu.startMiniHBaseCluster();
+
+      LOG.info(" - minicluster started");
+
+      try {
+        htu.deleteTable(TableName.valueOf(tableName));
+      } catch (Exception e) {
+        LOG.info(" - no table " + Bytes.toString(tableName) + " found");
+      }
+
+      LOG.info(" - creating table " + Bytes.toString(tableName));
+      htu.createTable(TableName.valueOf(tableName),
+          new byte[][]{columnFamily, columnFamily1});
+      LOG.info(" - created table");
+    } catch (Exception e1) {
+      throw new RuntimeException(e1);
+    }
+  }
+
+  @After
+  public void tearDown() {
+    try {
+      htu.deleteTable(TableName.valueOf(tableName));
+      LOG.info("shuting down minicluster");
+      htu.shutdownMiniHBaseCluster();
+      htu.shutdownMiniZKCluster();
+      LOG.info(" - minicluster shut down");
+      htu.cleanupTestDir();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void testBulkPut() throws IOException {
+
+    List<String> list = new ArrayList<>(5);
+    list.add("1," + columnFamilyStr + ",a,1");
+    list.add("2," + columnFamilyStr + ",a,2");
+    list.add("3," + columnFamilyStr + ",a,3");
+    list.add("4," + columnFamilyStr + ",a,4");
+    list.add("5," + columnFamilyStr + ",a,5");
+
+    JavaRDD<String> rdd = jsc.parallelize(list);
+
+    Configuration conf = htu.getConfiguration();
+
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+    Connection conn = ConnectionFactory.createConnection(conf);
+    Table table = conn.getTable(TableName.valueOf(tableName));
+
+    try {
+      List<Delete> deletes = new ArrayList<>(5);
+      for (int i = 1; i < 6; i++) {
+        deletes.add(new Delete(Bytes.toBytes(Integer.toString(i))));
+      }
+      table.delete(deletes);
+    } finally {
+      table.close();
+    }
+
+    hbaseContext.bulkPut(rdd,
+            TableName.valueOf(tableName),
+            new PutFunction());
+
+    table = conn.getTable(TableName.valueOf(tableName));
+
+    try {
+      Result result1 = table.get(new Get(Bytes.toBytes("1")));
+      Assert.assertNotNull("Row 1 should had been deleted", result1.getRow());
+
+      Result result2 = table.get(new Get(Bytes.toBytes("2")));
+      Assert.assertNotNull("Row 2 should had been deleted", result2.getRow());
+
+      Result result3 = table.get(new Get(Bytes.toBytes("3")));
+      Assert.assertNotNull("Row 3 should had been deleted", result3.getRow());
+
+      Result result4 = table.get(new Get(Bytes.toBytes("4")));
+      Assert.assertNotNull("Row 4 should had been deleted", result4.getRow());
+
+      Result result5 = table.get(new Get(Bytes.toBytes("5")));
+      Assert.assertNotNull("Row 5 should had been deleted", result5.getRow());
+    } finally {
+      table.close();
+      conn.close();
+    }
+  }
+
+  public static class PutFunction implements Function<String, Put> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public Put call(String v) throws Exception {
+      String[] cells = v.split(",");
+      Put put = new Put(Bytes.toBytes(cells[0]));
+
+      put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]),
+              Bytes.toBytes(cells[3]));
+      return put;
+    }
+  }
+
+  @Test
+  public void testBulkDelete() throws IOException {
+    List<byte[]> list = new ArrayList<>(3);
+    list.add(Bytes.toBytes("1"));
+    list.add(Bytes.toBytes("2"));
+    list.add(Bytes.toBytes("3"));
+
+    JavaRDD<byte[]> rdd = jsc.parallelize(list);
+
+    Configuration conf = htu.getConfiguration();
+
+    populateTableWithMockData(conf, TableName.valueOf(tableName));
+
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+    hbaseContext.bulkDelete(rdd, TableName.valueOf(tableName),
+            new JavaHBaseBulkDeleteExample.DeleteFunction(), 2);
+
+
+
+    try (
+            Connection conn = ConnectionFactory.createConnection(conf);
+            Table table = conn.getTable(TableName.valueOf(tableName))
+    ){
+      Result result1 = table.get(new Get(Bytes.toBytes("1")));
+      Assert.assertNull("Row 1 should had been deleted", result1.getRow());
+
+      Result result2 = table.get(new Get(Bytes.toBytes("2")));
+      Assert.assertNull("Row 2 should had been deleted", result2.getRow());
+
+      Result result3 = table.get(new Get(Bytes.toBytes("3")));
+      Assert.assertNull("Row 3 should had been deleted", result3.getRow());
+
+      Result result4 = table.get(new Get(Bytes.toBytes("4")));
+      Assert.assertNotNull("Row 4 should had been deleted", result4.getRow());
+
+      Result result5 = table.get(new Get(Bytes.toBytes("5")));
+      Assert.assertNotNull("Row 5 should had been deleted", result5.getRow());
+    }
+  }
+
+  @Test
+  public void testDistributedScan() throws IOException {
+    Configuration conf = htu.getConfiguration();
+
+    populateTableWithMockData(conf, TableName.valueOf(tableName));
+
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+    Scan scan = new Scan();
+    scan.setCaching(100);
+
+    JavaRDD<String> javaRdd =
+            hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
+                    .map(new ScanConvertFunction());
+
+    List<String> results = javaRdd.collect();
+
+    Assert.assertEquals(results.size(), 5);
+  }
+
+  private static class ScanConvertFunction implements
+          Function<Tuple2<ImmutableBytesWritable, Result>, String> {
+    @Override
+    public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception {
+      return Bytes.toString(v1._1().copyBytes());
+    }
+  }
+
+  @Test
+  public void testBulkGet() throws IOException {
+    List<byte[]> list = new ArrayList<>(5);
+    list.add(Bytes.toBytes("1"));
+    list.add(Bytes.toBytes("2"));
+    list.add(Bytes.toBytes("3"));
+    list.add(Bytes.toBytes("4"));
+    list.add(Bytes.toBytes("5"));
+
+    JavaRDD<byte[]> rdd = jsc.parallelize(list);
+
+    Configuration conf = htu.getConfiguration();
+
+    populateTableWithMockData(conf, TableName.valueOf(tableName));
+
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+    final JavaRDD<String> stringJavaRDD =
+            hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd,
+            new GetFunction(),
+            new ResultFunction());
+
+    Assert.assertEquals(stringJavaRDD.count(), 5);
+  }
+
+  @Test
+  public void testBulkLoad() throws Exception {
+
+    Path output = htu.getDataTestDir("testBulkLoad");
+    // Add cell as String: "row,falmily,qualifier,value"
+    List<String> list= new ArrayList<String>();
+    // row1
+    list.add("1," + columnFamilyStr + ",b,1");
+    // row3
+    list.add("3," + columnFamilyStr + ",a,2");
+    list.add("3," + columnFamilyStr + ",b,1");
+    list.add("3," + columnFamilyStr1 + ",a,1");
+    //row2
+    list.add("2," + columnFamilyStr + ",a,3");
+    list.add("2," + columnFamilyStr + ",b,3");
+
+    JavaRDD<String> rdd = jsc.parallelize(list);
+
+    Configuration conf = htu.getConfiguration();
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+
+
+    hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName), new BulkLoadFunction(),
+            output.toUri().getPath(), new HashMap<byte[], FamilyHFileWriteOptions>(), false,
+            HConstants.DEFAULT_MAX_FILE_SIZE);
+
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+         Admin admin = conn.getAdmin()) {
+      Table table = conn.getTable(TableName.valueOf(tableName));
+      // Do bulk load
+      LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
+      load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName)));
+
+
+
+      // Check row1
+      List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells();
+      Assert.assertEquals(cell1.size(), 1);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1");
+
+      // Check row3
+      List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells();
+      Assert.assertEquals(cell3.size(), 3);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1");
+
+      // Check row2
+      List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells();
+      Assert.assertEquals(cell2.size(), 2);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3");
+    }
+  }
+
+  @Test
+  public void testBulkLoadThinRows() throws Exception {
+    Path output = htu.getDataTestDir("testBulkLoadThinRows");
+    // because of the limitation of scala bulkLoadThinRows API
+    // we need to provide data as <row, all cells in that row>
+    List<List<String>> list= new ArrayList<List<String>>();
+    // row1
+    List<String> list1 = new ArrayList<String>();
+    list1.add("1," + columnFamilyStr + ",b,1");
+    list.add(list1);
+    // row3
+    List<String> list3 = new ArrayList<String>();
+    list3.add("3," + columnFamilyStr + ",a,2");
+    list3.add("3," + columnFamilyStr + ",b,1");
+    list3.add("3," + columnFamilyStr1 + ",a,1");
+    list.add(list3);
+    //row2
+    List<String> list2 = new ArrayList<String>();
+    list2.add("2," + columnFamilyStr + ",a,3");
+    list2.add("2," + columnFamilyStr + ",b,3");
+    list.add(list2);
+
+    JavaRDD<List<String>> rdd = jsc.parallelize(list);
+
+    Configuration conf = htu.getConfiguration();
+    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
+
+    hbaseContext.bulkLoadThinRows(rdd, TableName.valueOf(tableName), new BulkLoadThinRowsFunction(),
+            output.toString(), new HashMap<byte[], FamilyHFileWriteOptions>(), false,
+            HConstants.DEFAULT_MAX_FILE_SIZE);
+
+
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+         Admin admin = conn.getAdmin()) {
+      Table table = conn.getTable(TableName.valueOf(tableName));
+      // Do bulk load
+      LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
+      load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName)));
+
+      // Check row1
+      List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells();
+      Assert.assertEquals(cell1.size(), 1);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1");
+
+      // Check row3
+      List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells();
+      Assert.assertEquals(cell3.size(), 3);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1");
+
+      // Check row2
+      List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells();
+      Assert.assertEquals(cell2.size(), 2);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr);
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b");
+      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3");
+    }
+
+  }
+  public static class BulkLoadFunction
+          implements Function<String, Pair<KeyFamilyQualifier, byte[]>> {
+    @Override public Pair<KeyFamilyQualifier, byte[]> call(String v1) throws Exception {
+      if (v1 == null) {
+        return null;
+      }
+
+      String[] strs = v1.split(",");
+      if(strs.length != 4) {
+        return null;
+      }
+
+      KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]),
+              Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]));
+      return new Pair(kfq, Bytes.toBytes(strs[3]));
+    }
+  }
+
+  public static class BulkLoadThinRowsFunction
+          implements Function<List<String>, Pair<ByteArrayWrapper, FamiliesQualifiersValues>> {
+    @Override public Pair<ByteArrayWrapper, FamiliesQualifiersValues> call(List<String> list) {
+      if (list == null) {
+        return null;
+      }
+
+      ByteArrayWrapper rowKey = null;
+      FamiliesQualifiersValues fqv = new FamiliesQualifiersValues();
+      for (String cell : list) {
+        String[] strs = cell.split(",");
+        if (rowKey == null) {
+          rowKey = new ByteArrayWrapper(Bytes.toBytes(strs[0]));
+        }
+        fqv.add(Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]), Bytes.toBytes(strs[3]));
+      }
+      return new Pair(rowKey, fqv);
+    }
+  }
+
+  public static class GetFunction implements Function<byte[], Get> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public Get call(byte[] v) throws Exception {
+      return new Get(v);
+    }
+  }
+
+  public static class ResultFunction implements Function<Result, String> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public String call(Result result) throws Exception {
+      Iterator<Cell> it = result.listCells().iterator();
+      StringBuilder b = new StringBuilder();
+
+      b.append(Bytes.toString(result.getRow())).append(":");
+
+      while (it.hasNext()) {
+        Cell cell = it.next();
+        String q = Bytes.toString(CellUtil.cloneQualifier(cell));
+        if ("counter".equals(q)) {
+          b.append("(")
+                  .append(q)
+                  .append(",")
+                  .append(Bytes.toLong(CellUtil.cloneValue(cell)))
+                  .append(")");
+        } else {
+          b.append("(")
+                  .append(q)
+                  .append(",")
+                  .append(Bytes.toString(CellUtil.cloneValue(cell)))
+                  .append(")");
+        }
+      }
+      return b.toString();
+    }
+  }
+
+  private void populateTableWithMockData(Configuration conf, TableName tableName)
+          throws IOException {
+    try (
+      Connection conn = ConnectionFactory.createConnection(conf);
+      Table table = conn.getTable(tableName)) {
+
+      List<Put> puts = new ArrayList<>(5);
+
+      for (int i = 1; i < 6; i++) {
+        Put put = new Put(Bytes.toBytes(Integer.toString(i)));
+        put.addColumn(columnFamily, columnFamily, columnFamily);
+        puts.add(put);
+      }
+      table.put(puts);
+    }
+  }
+
+}
diff --git a/spark/hbase-spark/src/test/resources/hbase-site.xml b/spark/hbase-spark/src/test/resources/hbase-site.xml
new file mode 100644
index 0000000..b3fb0d9
--- /dev/null
+++ b/spark/hbase-spark/src/test/resources/hbase-site.xml
@@ -0,0 +1,157 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+  <property>
+    <name>hbase.regionserver.msginterval</name>
+    <value>1000</value>
+    <description>Interval between messages from the RegionServer to HMaster
+    in milliseconds.  Default is 15. Set this value low if you want unit
+    tests to be responsive.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>1000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as hbase:meta scanner and log roller.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.event.waiting.time</name>
+    <value>50</value>
+    <description>Time to sleep between checks to see if a table event took place.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.count</name>
+    <value>5</value>
+  </property>
+  <property>
+    <name>hbase.regionserver.metahandler.count</name>
+    <value>5</value>
+  </property>
+  <property>
+      <name>hbase.ipc.server.read.threadpool.size</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>hbase.master.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase master web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hbase.regionserver.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase regionserver web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.port.auto</name>
+    <value>true</value>
+    <description>Info server auto port bind. Enables automatic port
+    search if hbase.regionserver.info.port is already in use.
+    Enabled for testing to run multiple tests on one machine.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.safemode</name>
+    <value>false</value>
+    <description>
+    Turn on/off safe mode in region server. Always on for production, always off
+    for tests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.max.filesize</name>
+    <value>67108864</value>
+    <description>
+    Maximum desired file size for an HRegion.  If filesize exceeds
+    value + (value / 2), the HRegion is split in two.  Default: 256M.
+
+    Keep the maximum filesize small so we split more often in tests.
+    </description>
+  </property>
+  <property>
+    <name>hadoop.log.dir</name>
+    <value>${user.dir}/../logs</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.clientPort</name>
+    <value>21818</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+    The port at which the clients will connect.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+    <description>
+    Set to true to skip the 'hbase.defaults.for.version'.
+    Setting this to true can be useful in contexts other than
+    the other side of a maven generation; i.e. running in an
+    ide.  You'll want to set this boolean to true to avoid
+    seeing the RuntimeException complaint: "hbase-default.xml file
+    seems to be for and old version of HBase (@@@VERSION@@@), this
+    version is X.X.X-SNAPSHOT"
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.sanity.checks</name>
+    <value>false</value>
+    <description>Skip sanity checks in tests
+    </description>
+  </property>
+  <property>
+    <name>hbase.procedure.fail.on.corruption</name>
+    <value>true</value>
+    <description>
+      Enable replay sanity checks on procedure tests.
+    </description>
+  </property>
+</configuration>
diff --git a/spark/hbase-spark/src/test/resources/log4j.properties b/spark/hbase-spark/src/test/resources/log4j.properties
new file mode 100644
index 0000000..cd3b8e9
--- /dev/null
+++ b/spark/hbase-spark/src/test/resources/log4j.properties
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hbase.root.logger=INFO,FA
+hbase.log.dir=.
+hbase.log.file=hbase.log
+
+# Define the root logger to the system property "hbase.root.logger".
+log4j.rootLogger=${hbase.root.logger}
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+# Debugging Pattern format
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+#File Appender
+log4j.appender.FA=org.apache.log4j.FileAppender
+log4j.appender.FA.append=false
+log4j.appender.FA.file=target/log-output.txt
+log4j.appender.FA.layout=org.apache.log4j.PatternLayout
+log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+log4j.appender.FA.Threshold = INFO
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.zookeeper=ERROR
+log4j.logger.org.apache.hadoop.hbase=DEBUG
+
+#These settings are workarounds against spurious logs from the minicluster.
+#See HBASE-4709
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
+log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
+# Enable this to get detailed connection error/retry logging.
+# log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala
new file mode 100644
index 0000000..dc328f3
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala
@@ -0,0 +1,956 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.hbase.client.{Get, ConnectionFactory}
+import org.apache.hadoop.hbase.io.hfile.{CacheConfig, HFile}
+import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles
+import org.apache.hadoop.hbase.{HConstants, CellUtil, HBaseTestingUtility, TableName}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.spark.SparkContext
+import org.junit.rules.TemporaryFolder
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+class BulkLoadSuite extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
+  @transient var sc: SparkContext = null
+  var TEST_UTIL = new HBaseTestingUtility
+
+  val tableName = "t1"
+  val columnFamily1 = "f1"
+  val columnFamily2 = "f2"
+  val testFolder = new TemporaryFolder()
+
+
+  override def beforeAll() {
+    TEST_UTIL.startMiniCluster()
+    logInfo(" - minicluster started")
+
+    try {
+      TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    } catch {
+      case e: Exception =>
+        logInfo(" - no table " + tableName + " found")
+    }
+
+    logInfo(" - created table")
+
+    val envMap = Map[String,String](("Xmx", "512m"))
+
+    sc = new SparkContext("local", "test", null, Nil, envMap)
+  }
+
+  override def afterAll() {
+    logInfo("shuting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+    logInfo(" - minicluster shut down")
+    TEST_UTIL.cleanupTestDir()
+    sc.stop()
+  }
+
+  test("Wide Row Bulk Load: Test multi family and multi column tests " +
+    "with all default HFile Configs.") {
+    val config = TEST_UTIL.getConfiguration
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)))
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo2.a"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("a"), Bytes.toBytes("foo2.b"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.c"))),
+      (Bytes.toBytes("5"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2")))))
+
+
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    hbaseContext.bulkLoad[(Array[Byte], (Array[Byte], Array[Byte], Array[Byte]))](rdd,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = t._1
+        val family:Array[Byte] = t._2._1
+        val qualifier = t._2._2
+        val value:Array[Byte] = t._2._3
+
+        val keyFamilyQualifier= new KeyFamilyQualifier(rowKey, family, qualifier)
+
+        Seq((keyFamilyQualifier, value)).iterator
+      },
+      stagingFolder.getPath)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 2)
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath), conn.getAdmin, table,
+        conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("b"))
+
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+
+    }
+  }
+
+  test("Wide Row Bulk Load: Test HBase client: Test Roll Over and " +
+    "using an implicit call to bulk load") {
+    val config = TEST_UTIL.getConfiguration
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)))
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo2.b"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.a"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("c"), Bytes.toBytes("foo2.c"))),
+      (Bytes.toBytes("5"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2")))))
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    rdd.hbaseBulkLoad(hbaseContext,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = t._1
+        val family:Array[Byte] = t._2._1
+        val qualifier = t._2._2
+        val value = t._2._3
+
+        val keyFamilyQualifier= new KeyFamilyQualifier(rowKey, family, qualifier)
+
+        Seq((keyFamilyQualifier, value)).iterator
+      },
+      stagingFolder.getPath,
+      new java.util.HashMap[Array[Byte], FamilyHFileWriteOptions],
+      compactionExclude = false,
+      20)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 1)
+
+    assert(fs.listStatus(new Path(stagingFolder.getPath+ "/f1")).length == 5)
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath),
+        conn.getAdmin, table, conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("b"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("c"))
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+    }
+  }
+
+  test("Wide Row Bulk Load: Test multi family and multi column tests" +
+    " with one column family with custom configs plus multi region") {
+    val config = TEST_UTIL.getConfiguration
+
+    val splitKeys:Array[Array[Byte]] = new Array[Array[Byte]](2)
+    splitKeys(0) = Bytes.toBytes("2")
+    splitKeys(1) = Bytes.toBytes("4")
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)),
+      splitKeys)
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo2.a"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("a"), Bytes.toBytes("foo2.b"))),
+      (Bytes.toBytes("3"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.c"))),
+      (Bytes.toBytes("5"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      (Bytes.toBytes("4"),
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      (Bytes.toBytes("2"),
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2")))))
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    val familyHBaseWriterOptions = new java.util.HashMap[Array[Byte], FamilyHFileWriteOptions]
+
+    val f1Options = new FamilyHFileWriteOptions("GZ", "ROW", 128,
+      "PREFIX")
+
+    familyHBaseWriterOptions.put(Bytes.toBytes(columnFamily1), f1Options)
+
+    hbaseContext.bulkLoad[(Array[Byte], (Array[Byte], Array[Byte], Array[Byte]))](rdd,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = t._1
+        val family:Array[Byte] = t._2._1
+        val qualifier = t._2._2
+        val value = t._2._3
+
+        val keyFamilyQualifier= new KeyFamilyQualifier(rowKey, family, qualifier)
+
+        Seq((keyFamilyQualifier, value)).iterator
+      },
+      stagingFolder.getPath,
+      familyHBaseWriterOptions,
+      compactionExclude = false,
+      HConstants.DEFAULT_MAX_FILE_SIZE)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 2)
+
+    val f1FileList = fs.listStatus(new Path(stagingFolder.getPath +"/f1"))
+    for ( i <- 0 until f1FileList.length) {
+      val reader = HFile.createReader(fs, f1FileList(i).getPath,
+        new CacheConfig(config), true, config)
+      assert(reader.getCompressionAlgorithm.getName.equals("gz"))
+      assert(reader.getDataBlockEncoding.name().equals("PREFIX"))
+    }
+
+    assert( 3 ==  f1FileList.length)
+
+    val f2FileList = fs.listStatus(new Path(stagingFolder.getPath +"/f2"))
+    for ( i <- 0 until f2FileList.length) {
+      val reader = HFile.createReader(fs, f2FileList(i).getPath,
+        new CacheConfig(config), true, config)
+      assert(reader.getCompressionAlgorithm.getName.equals("none"))
+      assert(reader.getDataBlockEncoding.name().equals("NONE"))
+    }
+
+    assert( 2 ==  f2FileList.length)
+
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath),
+        conn.getAdmin, table, conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("b"))
+
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+
+    }
+  }
+
+  test("Test partitioner") {
+
+    var splitKeys:Array[Array[Byte]] = new Array[Array[Byte]](3)
+    splitKeys(0) = Bytes.toBytes("")
+    splitKeys(1) = Bytes.toBytes("3")
+    splitKeys(2) = Bytes.toBytes("7")
+
+    var partitioner = new BulkLoadPartitioner(splitKeys)
+
+    assert(0 == partitioner.getPartition(Bytes.toBytes("")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("1")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("2")))
+    assert(1 == partitioner.getPartition(Bytes.toBytes("3")))
+    assert(1 == partitioner.getPartition(Bytes.toBytes("4")))
+    assert(1 == partitioner.getPartition(Bytes.toBytes("6")))
+    assert(2 == partitioner.getPartition(Bytes.toBytes("7")))
+    assert(2 == partitioner.getPartition(Bytes.toBytes("8")))
+
+
+    splitKeys = new Array[Array[Byte]](1)
+    splitKeys(0) = Bytes.toBytes("")
+
+    partitioner = new BulkLoadPartitioner(splitKeys)
+
+    assert(0 == partitioner.getPartition(Bytes.toBytes("")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("1")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("2")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("3")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("4")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("6")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("7")))
+
+    splitKeys = new Array[Array[Byte]](7)
+    splitKeys(0) = Bytes.toBytes("")
+    splitKeys(1) = Bytes.toBytes("02")
+    splitKeys(2) = Bytes.toBytes("04")
+    splitKeys(3) = Bytes.toBytes("06")
+    splitKeys(4) = Bytes.toBytes("08")
+    splitKeys(5) = Bytes.toBytes("10")
+    splitKeys(6) = Bytes.toBytes("12")
+
+    partitioner = new BulkLoadPartitioner(splitKeys)
+
+    assert(0 == partitioner.getPartition(Bytes.toBytes("")))
+    assert(0 == partitioner.getPartition(Bytes.toBytes("01")))
+    assert(1 == partitioner.getPartition(Bytes.toBytes("02")))
+    assert(1 == partitioner.getPartition(Bytes.toBytes("03")))
+    assert(2 == partitioner.getPartition(Bytes.toBytes("04")))
+    assert(2 == partitioner.getPartition(Bytes.toBytes("05")))
+    assert(3 == partitioner.getPartition(Bytes.toBytes("06")))
+    assert(3 == partitioner.getPartition(Bytes.toBytes("07")))
+    assert(4 == partitioner.getPartition(Bytes.toBytes("08")))
+    assert(4 == partitioner.getPartition(Bytes.toBytes("09")))
+    assert(5 == partitioner.getPartition(Bytes.toBytes("10")))
+    assert(5 == partitioner.getPartition(Bytes.toBytes("11")))
+    assert(6 == partitioner.getPartition(Bytes.toBytes("12")))
+    assert(6 == partitioner.getPartition(Bytes.toBytes("13")))
+  }
+
+  test("Thin Row Bulk Load: Test multi family and multi column tests " +
+    "with all default HFile Configs") {
+    val config = TEST_UTIL.getConfiguration
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)))
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      ("1",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      ("3",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo2.a"))),
+      ("3",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("a"), Bytes.toBytes("foo2.b"))),
+      ("3",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.c"))),
+      ("5",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      ("4",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      ("4",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2"))))).
+      groupByKey()
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    hbaseContext.bulkLoadThinRows[(String, Iterable[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = Bytes.toBytes(t._1)
+
+        val familyQualifiersValues = new FamiliesQualifiersValues
+        t._2.foreach(f => {
+          val family:Array[Byte] = f._1
+          val qualifier = f._2
+          val value:Array[Byte] = f._3
+
+          familyQualifiersValues +=(family, qualifier, value)
+        })
+        (new ByteArrayWrapper(rowKey), familyQualifiersValues)
+      },
+      stagingFolder.getPath)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 2)
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath), conn.getAdmin, table,
+        conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("b"))
+
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+
+    }
+  }
+
+  test("Thin Row Bulk Load: Test HBase client: Test Roll Over and " +
+    "using an implicit call to bulk load") {
+    val config = TEST_UTIL.getConfiguration
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)))
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      ("1",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      ("3",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo2.b"))),
+      ("3",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.a"))),
+      ("3",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("c"), Bytes.toBytes("foo2.c"))),
+      ("5",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      ("4",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      ("4",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2"))))).
+      groupByKey()
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    rdd.hbaseBulkLoadThinRows(hbaseContext,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = t._1
+
+        val familyQualifiersValues = new FamiliesQualifiersValues
+        t._2.foreach(f => {
+          val family:Array[Byte] = f._1
+          val qualifier = f._2
+          val value:Array[Byte] = f._3
+
+          familyQualifiersValues +=(family, qualifier, value)
+        })
+        (new ByteArrayWrapper(Bytes.toBytes(rowKey)), familyQualifiersValues)
+      },
+      stagingFolder.getPath,
+      new java.util.HashMap[Array[Byte], FamilyHFileWriteOptions],
+      compactionExclude = false,
+      20)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 1)
+
+    assert(fs.listStatus(new Path(stagingFolder.getPath+ "/f1")).length == 5)
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath),
+        conn.getAdmin, table, conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("b"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("c"))
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+    }
+  }
+
+  test("Thin Row Bulk Load: Test multi family and multi column tests" +
+    " with one column family with custom configs plus multi region") {
+    val config = TEST_UTIL.getConfiguration
+
+    val splitKeys:Array[Array[Byte]] = new Array[Array[Byte]](2)
+    splitKeys(0) = Bytes.toBytes("2")
+    splitKeys(1) = Bytes.toBytes("4")
+
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName),
+      Array(Bytes.toBytes(columnFamily1), Bytes.toBytes(columnFamily2)),
+      splitKeys)
+
+    //There are a number of tests in here.
+    // 1. Row keys are not in order
+    // 2. Qualifiers are not in order
+    // 3. Column Families are not in order
+    // 4. There are tests for records in one column family and some in two column families
+    // 5. There are records will a single qualifier and some with two
+    val rdd = sc.parallelize(Array(
+      ("1",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))),
+      ("3",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo2.a"))),
+      ("3",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("a"), Bytes.toBytes("foo2.b"))),
+      ("3",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.c"))),
+      ("5",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))),
+      ("4",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))),
+      ("4",
+        (Bytes.toBytes(columnFamily2), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))),
+      ("2",
+        (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("bar.2"))))).
+      groupByKey()
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    testFolder.create()
+    val stagingFolder = testFolder.newFolder()
+
+    val familyHBaseWriterOptions = new java.util.HashMap[Array[Byte], FamilyHFileWriteOptions]
+
+    val f1Options = new FamilyHFileWriteOptions("GZ", "ROW", 128,
+      "PREFIX")
+
+    familyHBaseWriterOptions.put(Bytes.toBytes(columnFamily1), f1Options)
+
+    hbaseContext.bulkLoadThinRows[(String, Iterable[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
+      TableName.valueOf(tableName),
+      t => {
+        val rowKey = t._1
+
+        val familyQualifiersValues = new FamiliesQualifiersValues
+        t._2.foreach(f => {
+          val family:Array[Byte] = f._1
+          val qualifier = f._2
+          val value:Array[Byte] = f._3
+
+          familyQualifiersValues +=(family, qualifier, value)
+        })
+        (new ByteArrayWrapper(Bytes.toBytes(rowKey)), familyQualifiersValues)
+      },
+      stagingFolder.getPath,
+      familyHBaseWriterOptions,
+      compactionExclude = false,
+      HConstants.DEFAULT_MAX_FILE_SIZE)
+
+    val fs = FileSystem.get(config)
+    assert(fs.listStatus(new Path(stagingFolder.getPath)).length == 2)
+
+    val f1FileList = fs.listStatus(new Path(stagingFolder.getPath +"/f1"))
+    for ( i <- 0 until f1FileList.length) {
+      val reader = HFile.createReader(fs, f1FileList(i).getPath,
+        new CacheConfig(config), true, config)
+      assert(reader.getCompressionAlgorithm.getName.equals("gz"))
+      assert(reader.getDataBlockEncoding.name().equals("PREFIX"))
+    }
+
+    assert( 3 ==  f1FileList.length)
+
+    val f2FileList = fs.listStatus(new Path(stagingFolder.getPath +"/f2"))
+    for ( i <- 0 until f2FileList.length) {
+      val reader = HFile.createReader(fs, f2FileList(i).getPath,
+        new CacheConfig(config), true, config)
+      assert(reader.getCompressionAlgorithm.getName.equals("none"))
+      assert(reader.getDataBlockEncoding.name().equals("NONE"))
+    }
+
+    assert( 2 ==  f2FileList.length)
+
+
+    val conn = ConnectionFactory.createConnection(config)
+
+    val load = new LoadIncrementalHFiles(config)
+    val table = conn.getTable(TableName.valueOf(tableName))
+    try {
+      load.doBulkLoad(new Path(stagingFolder.getPath),
+        conn.getAdmin, table, conn.getRegionLocator(TableName.valueOf(tableName)))
+
+      val cells5 = table.get(new Get(Bytes.toBytes("5"))).listCells()
+      assert(cells5.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells5.get(0))).equals("foo3"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells5.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells5.get(0))).equals("a"))
+
+      val cells4 = table.get(new Get(Bytes.toBytes("4"))).listCells()
+      assert(cells4.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(0))).equals("foo.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells4.get(1))).equals("foo.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells4.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells4.get(1))).equals("b"))
+
+      val cells3 = table.get(new Get(Bytes.toBytes("3"))).listCells()
+      assert(cells3.size == 3)
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(0))).equals("foo2.c"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(1))).equals("foo2.b"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(1))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(1))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells3.get(2))).equals("foo2.a"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells3.get(2))).equals("f2"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells3.get(2))).equals("b"))
+
+
+      val cells2 = table.get(new Get(Bytes.toBytes("2"))).listCells()
+      assert(cells2.size == 2)
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(0))).equals("bar.1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(0))).equals("a"))
+      assert(Bytes.toString(CellUtil.cloneValue(cells2.get(1))).equals("bar.2"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells2.get(1))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells2.get(1))).equals("b"))
+
+      val cells1 = table.get(new Get(Bytes.toBytes("1"))).listCells()
+      assert(cells1.size == 1)
+      assert(Bytes.toString(CellUtil.cloneValue(cells1.get(0))).equals("foo1"))
+      assert(Bytes.toString(CellUtil.cloneFamily(cells1.get(0))).equals("f1"))
+      assert(Bytes.toString(CellUtil.cloneQualifier(cells1.get(0))).equals("a"))
+
+    } finally {
+      table.close()
+      val admin = ConnectionFactory.createConnection(config).getAdmin
+      try {
+        admin.disableTable(TableName.valueOf(tableName))
+        admin.deleteTable(TableName.valueOf(tableName))
+      } finally {
+        admin.close()
+      }
+      fs.delete(new Path(stagingFolder.getPath), true)
+
+      testFolder.delete()
+
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala
new file mode 100644
index 0000000..afe515b
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala
@@ -0,0 +1,1063 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericData
+import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
+import org.apache.hadoop.hbase.spark.datasources.{HBaseSparkConf, HBaseTableCatalog}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+import org.xml.sax.SAXParseException
+
+case class HBaseRecord(
+  col0: String,
+  col1: Boolean,
+  col2: Double,
+  col3: Float,
+  col4: Int,
+  col5: Long,
+  col6: Short,
+  col7: String,
+  col8: Byte)
+
+object HBaseRecord {
+  def apply(i: Int, t: String): HBaseRecord = {
+    val s = s"""row${"%03d".format(i)}"""
+    HBaseRecord(s,
+      i % 2 == 0,
+      i.toDouble,
+      i.toFloat,
+      i,
+      i.toLong,
+      i.toShort,
+      s"String$i: $t",
+      i.toByte)
+  }
+}
+
+
+case class AvroHBaseKeyRecord(col0: Array[Byte],
+                              col1: Array[Byte])
+
+object AvroHBaseKeyRecord {
+  val schemaString =
+    s"""{"namespace": "example.avro",
+        |   "type": "record",      "name": "User",
+        |    "fields": [      {"name": "name", "type": "string"},
+        |      {"name": "favorite_number",  "type": ["int", "null"]},
+        |        {"name": "favorite_color", "type": ["string", "null"]}      ]    }""".stripMargin
+
+  val avroSchema: Schema = {
+    val p = new Schema.Parser
+    p.parse(schemaString)
+  }
+
+  def apply(i: Int): AvroHBaseKeyRecord = {
+    val user = new GenericData.Record(avroSchema);
+    user.put("name", s"name${"%03d".format(i)}")
+    user.put("favorite_number", i)
+    user.put("favorite_color", s"color${"%03d".format(i)}")
+    val avroByte = AvroSerdes.serialize(user, avroSchema)
+    AvroHBaseKeyRecord(avroByte, avroByte)
+  }
+}
+
+class DefaultSourceSuite extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll with Logging {
+  @transient var sc: SparkContext = null
+  var TEST_UTIL: HBaseTestingUtility = new HBaseTestingUtility
+
+  val t1TableName = "t1"
+  val t2TableName = "t2"
+  val columnFamily = "c"
+
+  var sqlContext:SQLContext = null
+  var df:DataFrame = null
+
+  override def beforeAll() {
+
+    TEST_UTIL.startMiniCluster
+
+    logInfo(" - minicluster started")
+    try
+      TEST_UTIL.deleteTable(TableName.valueOf(t1TableName))
+    catch {
+      case e: Exception => logInfo(" - no table " + t1TableName + " found")
+    }
+    try
+      TEST_UTIL.deleteTable(TableName.valueOf(t2TableName))
+    catch {
+      case e: Exception => logInfo(" - no table " + t2TableName + " found")
+    }
+    logInfo(" - creating table " + t1TableName)
+    TEST_UTIL.createTable(TableName.valueOf(t1TableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+    logInfo(" - creating table " + t2TableName)
+    TEST_UTIL.createTable(TableName.valueOf(t2TableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+    val sparkConf = new SparkConf
+    sparkConf.set(HBaseSparkConf.QUERY_CACHEBLOCKS, "true")
+    sparkConf.set(HBaseSparkConf.QUERY_BATCHSIZE, "100")
+    sparkConf.set(HBaseSparkConf.QUERY_CACHEDROWS, "100")
+
+    sc  = new SparkContext("local", "test", sparkConf)
+
+    val connection = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration)
+    try {
+      val t1Table = connection.getTable(TableName.valueOf("t1"))
+
+      try {
+        var put = new Put(Bytes.toBytes("get1"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("1"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(1))
+        t1Table.put(put)
+        put = new Put(Bytes.toBytes("get2"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("4"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(4))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("z"), Bytes.toBytes("FOO"))
+        t1Table.put(put)
+        put = new Put(Bytes.toBytes("get3"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("8"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(8))
+        t1Table.put(put)
+        put = new Put(Bytes.toBytes("get4"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo4"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("10"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(10))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("z"), Bytes.toBytes("BAR"))
+        t1Table.put(put)
+        put = new Put(Bytes.toBytes("get5"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo5"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("8"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(8))
+        t1Table.put(put)
+      } finally {
+        t1Table.close()
+      }
+
+      val t2Table = connection.getTable(TableName.valueOf("t2"))
+
+      try {
+        var put = new Put(Bytes.toBytes(1))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("1"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(1))
+        t2Table.put(put)
+        put = new Put(Bytes.toBytes(2))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("4"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(4))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("z"), Bytes.toBytes("FOO"))
+        t2Table.put(put)
+        put = new Put(Bytes.toBytes(3))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("8"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(8))
+        t2Table.put(put)
+        put = new Put(Bytes.toBytes(4))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo4"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("10"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(10))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("z"), Bytes.toBytes("BAR"))
+        t2Table.put(put)
+        put = new Put(Bytes.toBytes(5))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo5"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("8"))
+        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("i"), Bytes.toBytes(8))
+        t2Table.put(put)
+      } finally {
+        t2Table.close()
+      }
+    } finally {
+      connection.close()
+    }
+
+    def hbaseTable1Catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"}
+            |}
+          |}""".stripMargin
+
+    new HBaseContext(sc, TEST_UTIL.getConfiguration)
+    sqlContext = new SQLContext(sc)
+
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->hbaseTable1Catalog))
+
+    df.registerTempTable("hbaseTable1")
+
+    def hbaseTable2Catalog = s"""{
+            |"table":{"namespace":"default", "name":"t2"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"int"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"}
+            |}
+          |}""".stripMargin
+
+
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->hbaseTable2Catalog))
+
+    df.registerTempTable("hbaseTable2")
+  }
+
+  override def afterAll() {
+    TEST_UTIL.deleteTable(TableName.valueOf(t1TableName))
+    logInfo("shuting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+
+    sc.stop()
+  }
+
+  override def beforeEach(): Unit = {
+    DefaultSourceStaticUtils.lastFiveExecutionRules.clear()
+  }
+
+
+  /**
+   * A example of query three fields and also only using rowkey points for the filter
+   */
+  test("Test rowKey point only rowKey query") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "(KEY_FIELD = 'get1' or KEY_FIELD = 'get2' or KEY_FIELD = 'get3')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 3)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( KEY_FIELD == 0 OR KEY_FIELD == 1 ) OR KEY_FIELD == 2 )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 3)
+    assert(executionRules.rowKeyFilter.ranges.size == 0)
+  }
+
+  /**
+   * A example of query three fields and also only using cell points for the filter
+   */
+  test("Test cell point only rowKey query") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "(B_FIELD = '4' or B_FIELD = '10' or A_FIELD = 'foo1')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 3)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( B_FIELD == 0 OR B_FIELD == 1 ) OR A_FIELD == 2 )"))
+  }
+
+  /**
+   * A example of a OR merge between to ranges the result is one range
+   * Also an example of less then and greater then
+   */
+  test("Test two range rowKey query") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "( KEY_FIELD < 'get2' or KEY_FIELD > 'get3')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 3)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( KEY_FIELD < 0 OR KEY_FIELD > 1 )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 2)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("")))
+    assert(Bytes.equals(scanRange1.upperBound,Bytes.toBytes("get2")))
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(!scanRange1.isUpperBoundEqualTo)
+
+    val scanRange2 = executionRules.rowKeyFilter.ranges.get(1).get
+    assert(Bytes.equals(scanRange2.lowerBound,Bytes.toBytes("get3")))
+    assert(scanRange2.upperBound == null)
+    assert(!scanRange2.isLowerBoundEqualTo)
+    assert(scanRange2.isUpperBoundEqualTo)
+  }
+
+  /**
+   * A example of a OR merge between to ranges the result is one range
+   * Also an example of less then and greater then
+   *
+   * This example makes sure the code works for a int rowKey
+   */
+  test("Test two range rowKey query where the rowKey is Int and there is a range over lap") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable2 " +
+      "WHERE " +
+      "( KEY_FIELD < 4 or KEY_FIELD > 2)").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( KEY_FIELD < 0 OR KEY_FIELD > 1 )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 2)
+    assert(results.length == 5)
+  }
+
+  /**
+   * A example of a OR merge between to ranges the result is two ranges
+   * Also an example of less then and greater then
+   *
+   * This example makes sure the code works for a int rowKey
+   */
+  test("Test two range rowKey query where the rowKey is Int and the ranges don't over lap") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable2 " +
+      "WHERE " +
+      "( KEY_FIELD < 2 or KEY_FIELD > 4)").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( KEY_FIELD < 0 OR KEY_FIELD > 1 )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+
+    assert(executionRules.rowKeyFilter.ranges.size == 3)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.upperBound, Bytes.toBytes(2)))
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(!scanRange1.isUpperBoundEqualTo)
+
+    val scanRange2 = executionRules.rowKeyFilter.ranges.get(1).get
+    assert(scanRange2.isUpperBoundEqualTo)
+
+    assert(results.length == 2)
+  }
+
+  /**
+   * A example of a AND merge between to ranges the result is one range
+   * Also an example of less then and equal to and greater then and equal to
+   */
+  test("Test one combined range rowKey query") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "(KEY_FIELD <= 'get3' and KEY_FIELD >= 'get2')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 2)
+
+    val expr = executionRules.dynamicLogicExpression.toExpressionString
+    assert(expr.equals("( ( KEY_FIELD isNotNull AND KEY_FIELD <= 0 ) AND KEY_FIELD >= 1 )"), expr)
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 1)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("get2")))
+    assert(Bytes.equals(scanRange1.upperBound, Bytes.toBytes("get3")))
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+
+  }
+
+  /**
+   * Do a select with no filters
+   */
+  test("Test select only query") {
+
+    val results = df.select("KEY_FIELD").take(10)
+    assert(results.length == 5)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(executionRules.dynamicLogicExpression == null)
+
+  }
+
+  /**
+   * A complex query with one point and one range for both the
+   * rowKey and the a column
+   */
+  test("Test SQL point and range combo") {
+    val results = sqlContext.sql("SELECT KEY_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "(KEY_FIELD = 'get1' and B_FIELD < '3') or " +
+      "(KEY_FIELD >= 'get3' and B_FIELD = '8')").take(5)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( KEY_FIELD == 0 AND B_FIELD < 1 ) OR " +
+      "( KEY_FIELD >= 2 AND B_FIELD == 3 ) )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 1)
+    assert(executionRules.rowKeyFilter.ranges.size == 1)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("get3")))
+    assert(scanRange1.upperBound == null)
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+
+
+    assert(results.length == 3)
+  }
+
+  /**
+   * A complex query with two complex ranges that doesn't merge into one
+   */
+  test("Test two complete range non merge rowKey query") {
+
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable2 " +
+      "WHERE " +
+      "( KEY_FIELD >= 1 and KEY_FIELD <= 2) or" +
+      "( KEY_FIELD > 3 and KEY_FIELD <= 5)").take(10)
+
+
+    assert(results.length == 4)
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( KEY_FIELD >= 0 AND KEY_FIELD <= 1 ) OR " +
+      "( KEY_FIELD > 2 AND KEY_FIELD <= 3 ) )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 2)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes(1)))
+    assert(Bytes.equals(scanRange1.upperBound, Bytes.toBytes(2)))
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+
+    val scanRange2 = executionRules.rowKeyFilter.ranges.get(1).get
+    assert(Bytes.equals(scanRange2.lowerBound,Bytes.toBytes(3)))
+    assert(Bytes.equals(scanRange2.upperBound, Bytes.toBytes(5)))
+    assert(!scanRange2.isLowerBoundEqualTo)
+    assert(scanRange2.isUpperBoundEqualTo)
+
+  }
+
+  /**
+   * A complex query with two complex ranges that does merge into one
+   */
+  test("Test two complete range merge rowKey query") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "( KEY_FIELD >= 'get1' and KEY_FIELD <= 'get2') or" +
+      "( KEY_FIELD > 'get3' and KEY_FIELD <= 'get5')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 4)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( KEY_FIELD >= 0 AND KEY_FIELD <= 1 ) OR " +
+      "( KEY_FIELD > 2 AND KEY_FIELD <= 3 ) )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 2)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("get1")))
+    assert(Bytes.equals(scanRange1.upperBound, Bytes.toBytes("get2")))
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+
+    val scanRange2 = executionRules.rowKeyFilter.ranges.get(1).get
+    assert(Bytes.equals(scanRange2.lowerBound, Bytes.toBytes("get3")))
+    assert(Bytes.equals(scanRange2.upperBound, Bytes.toBytes("get5")))
+    assert(!scanRange2.isLowerBoundEqualTo)
+    assert(scanRange2.isUpperBoundEqualTo)
+  }
+
+  test("Test OR logic with a one RowKey and One column") {
+
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "( KEY_FIELD >= 'get1' or A_FIELD <= 'foo2') or" +
+      "( KEY_FIELD > 'get3' or B_FIELD <= '4')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 5)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( KEY_FIELD >= 0 OR A_FIELD <= 1 ) OR " +
+      "( KEY_FIELD > 2 OR B_FIELD <= 3 ) )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 1)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    //This is the main test for 14406
+    //Because the key is joined through a or with a qualifier
+    //There is no filter on the rowKey
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("")))
+    assert(scanRange1.upperBound == null)
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+  }
+
+  test("Test OR logic with a two columns") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "( B_FIELD > '4' or A_FIELD <= 'foo2') or" +
+      "( A_FIELD > 'foo2' or B_FIELD < '4')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 5)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( ( B_FIELD > 0 OR A_FIELD <= 1 ) OR " +
+      "( A_FIELD > 2 OR B_FIELD < 3 ) )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 1)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("")))
+    assert(scanRange1.upperBound == null)
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+
+  }
+
+  test("Test single RowKey Or Column logic") {
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
+      "WHERE " +
+      "( KEY_FIELD >= 'get4' or A_FIELD <= 'foo2' )").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 4)
+
+    assert(executionRules.dynamicLogicExpression.toExpressionString.
+      equals("( KEY_FIELD >= 0 OR A_FIELD <= 1 )"))
+
+    assert(executionRules.rowKeyFilter.points.size == 0)
+    assert(executionRules.rowKeyFilter.ranges.size == 1)
+
+    val scanRange1 = executionRules.rowKeyFilter.ranges.get(0).get
+    assert(Bytes.equals(scanRange1.lowerBound,Bytes.toBytes("")))
+    assert(scanRange1.upperBound == null)
+    assert(scanRange1.isLowerBoundEqualTo)
+    assert(scanRange1.isUpperBoundEqualTo)
+  }
+
+  test("Test table that doesn't exist") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1NotThere"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"c", "type":"string"}
+            |}
+          |}""".stripMargin
+
+    intercept[Exception] {
+      df = sqlContext.load("org.apache.hadoop.hbase.spark",
+        Map(HBaseTableCatalog.tableCatalog->catalog))
+
+      df.registerTempTable("hbaseNonExistingTmp")
+
+      sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseNonExistingTmp " +
+        "WHERE " +
+        "( KEY_FIELD >= 'get1' and KEY_FIELD <= 'get3') or" +
+        "( KEY_FIELD > 'get3' and KEY_FIELD <= 'get5')").count()
+    }
+    DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+  }
+
+
+  test("Test table with column that doesn't exist") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"},
+              |"C_FIELD":{"cf":"c", "col":"c", "type":"string"}
+            |}
+          |}""".stripMargin
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->catalog))
+
+    df.registerTempTable("hbaseFactColumnTmp")
+
+    val result = sqlContext.sql("SELECT KEY_FIELD, " +
+      "B_FIELD, A_FIELD FROM hbaseFactColumnTmp")
+
+    assert(result.count() == 5)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+    assert(executionRules.dynamicLogicExpression == null)
+
+  }
+
+  test("Test table with INT column") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"},
+              |"I_FIELD":{"cf":"c", "col":"i", "type":"int"}
+            |}
+          |}""".stripMargin
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->catalog))
+
+    df.registerTempTable("hbaseIntTmp")
+
+    val result = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, I_FIELD FROM hbaseIntTmp"+
+    " where I_FIELD > 4 and I_FIELD < 10")
+
+    val localResult = result.take(5)
+
+    assert(localResult.length == 2)
+    assert(localResult(0).getInt(2) == 8)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+    val expr = executionRules.dynamicLogicExpression.toExpressionString
+    logInfo(expr)
+    assert(expr.equals("( ( I_FIELD isNotNull AND I_FIELD > 0 ) AND I_FIELD < 1 )"), expr)
+
+  }
+
+  test("Test table with INT column defined at wrong type") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"},
+              |"I_FIELD":{"cf":"c", "col":"i", "type":"string"}
+            |}
+          |}""".stripMargin
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->catalog))
+
+    df.registerTempTable("hbaseIntWrongTypeTmp")
+
+    val result = sqlContext.sql("SELECT KEY_FIELD, " +
+      "B_FIELD, I_FIELD FROM hbaseIntWrongTypeTmp")
+
+    val localResult = result.take(10)
+    assert(localResult.length == 5)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+    assert(executionRules.dynamicLogicExpression == null)
+
+    assert(localResult(0).getString(2).length == 4)
+    assert(localResult(0).getString(2).charAt(0).toByte == 0)
+    assert(localResult(0).getString(2).charAt(1).toByte == 0)
+    assert(localResult(0).getString(2).charAt(2).toByte == 0)
+    assert(localResult(0).getString(2).charAt(3).toByte == 1)
+  }
+
+  test("Test bad column type") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"FOOBAR"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"I_FIELD":{"cf":"c", "col":"i", "type":"string"}
+            |}
+          |}""".stripMargin
+    intercept[Exception] {
+      df = sqlContext.load("org.apache.hadoop.hbase.spark",
+        Map(HBaseTableCatalog.tableCatalog->catalog))
+
+      df.registerTempTable("hbaseIntWrongTypeTmp")
+
+      val result = sqlContext.sql("SELECT KEY_FIELD, " +
+        "B_FIELD, I_FIELD FROM hbaseIntWrongTypeTmp")
+
+      val localResult = result.take(10)
+      assert(localResult.length == 5)
+
+      val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+      assert(executionRules.dynamicLogicExpression == null)
+
+    }
+  }
+
+  test("Test HBaseSparkConf matching") {
+    val df = sqlContext.load("org.apache.hadoop.hbase.spark.HBaseTestSource",
+      Map("cacheSize" -> "100",
+        "batchNum" -> "100",
+        "blockCacheingEnable" -> "true", "rowNum" -> "10"))
+    assert(df.count() == 10)
+
+    val df1 = sqlContext.load("org.apache.hadoop.hbase.spark.HBaseTestSource",
+      Map("cacheSize" -> "1000",
+        "batchNum" -> "100", "blockCacheingEnable" -> "true", "rowNum" -> "10"))
+    intercept[Exception] {
+      assert(df1.count() == 10)
+    }
+
+    val df2 = sqlContext.load("org.apache.hadoop.hbase.spark.HBaseTestSource",
+      Map("cacheSize" -> "100",
+        "batchNum" -> "1000", "blockCacheingEnable" -> "true", "rowNum" -> "10"))
+    intercept[Exception] {
+      assert(df2.count() == 10)
+    }
+
+    val df3 = sqlContext.load("org.apache.hadoop.hbase.spark.HBaseTestSource",
+      Map("cacheSize" -> "100",
+        "batchNum" -> "100", "blockCacheingEnable" -> "false", "rowNum" -> "10"))
+    intercept[Exception] {
+      assert(df3.count() == 10)
+    }
+  }
+
+  test("Test table with sparse column") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"},
+              |"Z_FIELD":{"cf":"c", "col":"z", "type":"string"}
+            |}
+          |}""".stripMargin
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->catalog))
+
+    df.registerTempTable("hbaseZTmp")
+
+    val result = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, Z_FIELD FROM hbaseZTmp")
+
+    val localResult = result.take(10)
+    assert(localResult.length == 5)
+
+    assert(localResult(0).getString(2) == null)
+    assert(localResult(1).getString(2) == "FOO")
+    assert(localResult(2).getString(2) == null)
+    assert(localResult(3).getString(2) == "BAR")
+    assert(localResult(4).getString(2) == null)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+    assert(executionRules.dynamicLogicExpression == null)
+  }
+
+  test("Test with column logic disabled") {
+    val catalog = s"""{
+            |"table":{"namespace":"default", "name":"t1"},
+            |"rowkey":"key",
+            |"columns":{
+              |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+              |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+              |"B_FIELD":{"cf":"c", "col":"b", "type":"string"},
+              |"Z_FIELD":{"cf":"c", "col":"z", "type":"string"}
+            |}
+          |}""".stripMargin
+    df = sqlContext.load("org.apache.hadoop.hbase.spark",
+      Map(HBaseTableCatalog.tableCatalog->catalog,
+        HBaseSparkConf.PUSHDOWN_COLUMN_FILTER -> "false"))
+
+    df.registerTempTable("hbaseNoPushDownTmp")
+
+    val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseNoPushDownTmp " +
+      "WHERE " +
+      "(KEY_FIELD <= 'get3' and KEY_FIELD >= 'get2')").take(10)
+
+    val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
+
+    assert(results.length == 2)
+
+    assert(executionRules.dynamicLogicExpression == null)
+  }
+
+  def writeCatalog = s"""{
+                    |"table":{"namespace":"default", "name":"table1"},
+                    |"rowkey":"key",
+                    |"columns":{
+                    |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
+                    |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
+                    |"col2":{"cf":"cf1", "col":"col2", "type":"double"},
+                    |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
+                    |"col4":{"cf":"cf3", "col":"col4", "type":"int"},
+                    |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
+                    |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
+                    |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
+                    |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
+                    |}
+                    |}""".stripMargin
+
+  def withCatalog(cat: String): DataFrame = {
+    sqlContext
+      .read
+      .options(Map(HBaseTableCatalog.tableCatalog->cat))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+  }
+
+  test("populate table") {
+    val sql = sqlContext
+    import sql.implicits._
+    val data = (0 to 255).map { i =>
+      HBaseRecord(i, "extra")
+    }
+    sc.parallelize(data).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+  }
+
+  test("empty column") {
+    val df = withCatalog(writeCatalog)
+    df.registerTempTable("table0")
+    val c = sqlContext.sql("select count(1) from table0").rdd.collect()(0)(0).asInstanceOf[Long]
+    assert(c == 256)
+  }
+
+  test("full query") {
+    val df = withCatalog(writeCatalog)
+    df.show()
+    assert(df.count() == 256)
+  }
+
+  test("filtered query0") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(writeCatalog)
+    val s = df.filter($"col0" <= "row005")
+      .select("col0", "col1")
+    s.show()
+    assert(s.count() == 6)
+  }
+
+  test("Timestamp semantics") {
+    val sql = sqlContext
+    import sql.implicits._
+
+    // There's already some data in here from recently. Let's throw something in
+    // from 1993 which we can include/exclude and add some data with the implicit (now) timestamp.
+    // Then we should be able to cross-section it and only get points in between, get the most recent view
+    // and get an old view.
+    val oldMs = 754869600000L
+    val startMs = System.currentTimeMillis()
+    val oldData = (0 to 100).map { i =>
+      HBaseRecord(i, "old")
+    }
+    val newData = (200 to 255).map { i =>
+      HBaseRecord(i, "new")
+    }
+
+    sc.parallelize(oldData).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseTableCatalog.tableName -> "5",
+        HBaseSparkConf.TIMESTAMP -> oldMs.toString))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+    sc.parallelize(newData).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseTableCatalog.tableName -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+
+    // Test specific timestamp -- Full scan, Timestamp
+    val individualTimestamp = sqlContext.read
+      .options(Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseSparkConf.TIMESTAMP -> oldMs.toString))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+    assert(individualTimestamp.count() == 101)
+
+    // Test getting everything -- Full Scan, No range
+    val everything = sqlContext.read
+      .options(Map(HBaseTableCatalog.tableCatalog -> writeCatalog))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+    assert(everything.count() == 256)
+    // Test getting everything -- Pruned Scan, TimeRange
+    val element50 = everything.where(col("col0") === lit("row050")).select("col7").collect()(0)(0)
+    assert(element50 == "String50: extra")
+    val element200 = everything.where(col("col0") === lit("row200")).select("col7").collect()(0)(0)
+    assert(element200 == "String200: new")
+
+    // Test Getting old stuff -- Full Scan, TimeRange
+    val oldRange = sqlContext.read
+      .options(Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseSparkConf.TIMERANGE_START -> "0",
+        HBaseSparkConf.TIMERANGE_END -> (oldMs + 100).toString))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+    assert(oldRange.count() == 101)
+    // Test Getting old stuff -- Pruned Scan, TimeRange
+    val oldElement50 = oldRange.where(col("col0") === lit("row050")).select("col7").collect()(0)(0)
+    assert(oldElement50 == "String50: old")
+
+    // Test Getting middle stuff -- Full Scan, TimeRange
+    val middleRange = sqlContext.read
+      .options(Map(HBaseTableCatalog.tableCatalog -> writeCatalog, HBaseSparkConf.TIMERANGE_START -> "0",
+        HBaseSparkConf.TIMERANGE_END -> (startMs + 100).toString))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+    assert(middleRange.count() == 256)
+    // Test Getting middle stuff -- Pruned Scan, TimeRange
+    val middleElement200 = middleRange.where(col("col0") === lit("row200")).select("col7").collect()(0)(0)
+    assert(middleElement200 == "String200: extra")
+  }
+
+
+  // catalog for insertion
+  def avroWriteCatalog = s"""{
+                             |"table":{"namespace":"default", "name":"avrotable"},
+                             |"rowkey":"key",
+                             |"columns":{
+                             |"col0":{"cf":"rowkey", "col":"key", "type":"binary"},
+                             |"col1":{"cf":"cf1", "col":"col1", "type":"binary"}
+                             |}
+                             |}""".stripMargin
+
+  // catalog for read
+  def avroCatalog = s"""{
+                        |"table":{"namespace":"default", "name":"avrotable"},
+                        |"rowkey":"key",
+                        |"columns":{
+                        |"col0":{"cf":"rowkey", "col":"key",  "avro":"avroSchema"},
+                        |"col1":{"cf":"cf1", "col":"col1", "avro":"avroSchema"}
+                        |}
+                        |}""".stripMargin
+
+  // for insert to another table
+  def avroCatalogInsert = s"""{
+                              |"table":{"namespace":"default", "name":"avrotableInsert"},
+                              |"rowkey":"key",
+                              |"columns":{
+                              |"col0":{"cf":"rowkey", "col":"key", "avro":"avroSchema"},
+                              |"col1":{"cf":"cf1", "col":"col1", "avro":"avroSchema"}
+                              |}
+                              |}""".stripMargin
+
+  def withAvroCatalog(cat: String): DataFrame = {
+    sqlContext
+      .read
+      .options(Map("avroSchema"->AvroHBaseKeyRecord.schemaString,
+        HBaseTableCatalog.tableCatalog->avroCatalog))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+  }
+
+
+  test("populate avro table") {
+    val sql = sqlContext
+    import sql.implicits._
+
+    val data = (0 to 255).map { i =>
+      AvroHBaseKeyRecord(i)
+    }
+    sc.parallelize(data).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> avroWriteCatalog,
+        HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+  }
+
+  test("avro empty column") {
+    val df = withAvroCatalog(avroCatalog)
+    df.registerTempTable("avrotable")
+    val c = sqlContext.sql("select count(1) from avrotable")
+      .rdd.collect()(0)(0).asInstanceOf[Long]
+    assert(c == 256)
+  }
+
+  test("avro full query") {
+    val df = withAvroCatalog(avroCatalog)
+    df.show()
+    df.printSchema()
+    assert(df.count() == 256)
+  }
+
+  test("avro serialization and deserialization query") {
+    val df = withAvroCatalog(avroCatalog)
+    df.write.options(
+      Map("avroSchema"->AvroHBaseKeyRecord.schemaString,
+        HBaseTableCatalog.tableCatalog->avroCatalogInsert,
+        HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+    val newDF = withAvroCatalog(avroCatalogInsert)
+    newDF.show()
+    newDF.printSchema()
+    assert(newDF.count() == 256)
+  }
+
+  test("avro filtered query") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withAvroCatalog(avroCatalog)
+    val r = df.filter($"col1.name" === "name005" || $"col1.name" <= "name005")
+      .select("col0", "col1.favorite_color", "col1.favorite_number")
+    r.show()
+    assert(r.count() == 6)
+  }
+
+  test("avro Or filter") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withAvroCatalog(avroCatalog)
+    val s = df.filter($"col1.name" <= "name005" || $"col1.name".contains("name007"))
+      .select("col0", "col1.favorite_color", "col1.favorite_number")
+    s.show()
+    assert(s.count() == 7)
+  }
+
+  test("test create HBaseRelation with new context throws SAXParseException") {
+    val catalog = s"""{
+                     |"table":{"namespace":"default", "name":"t1NotThere"},
+                     |"rowkey":"key",
+                     |"columns":{
+                     |"KEY_FIELD":{"cf":"rowkey", "col":"key", "type":"string"},
+                     |"A_FIELD":{"cf":"c", "col":"a", "type":"string"},
+                     |"B_FIELD":{"cf":"c", "col":"c", "type":"string"}
+                     |}
+                     |}""".stripMargin
+    try {
+      HBaseRelation(Map(HBaseTableCatalog.tableCatalog -> catalog,
+        HBaseSparkConf.USE_HBASECONTEXT -> "false"), None)(sqlContext)
+    } catch {
+        case e: Throwable => if(e.getCause.isInstanceOf[SAXParseException]) {
+          fail("SAXParseException due to configuration loading empty resource")
+        } else {
+          println("Failed due to some other exception, ignore " + e.getMessage)
+        }
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala
new file mode 100644
index 0000000..0424527
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util
+
+import org.apache.hadoop.hbase.spark.datasources.{HBaseSparkConf, JavaBytesEncoder}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.sql.types._
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+class DynamicLogicExpressionSuite  extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll with Logging {
+
+  val encoder = JavaBytesEncoder.create(HBaseSparkConf.DEFAULT_QUERY_ENCODER)
+
+  test("Basic And Test") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+    val andLogic = new AndLogicExpression(leftLogic, rightLogic)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(10)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    val expressionString = andLogic.toExpressionString
+
+    assert(expressionString.equals("( Col1 < 0 AND Col1 > 1 )"))
+
+    val builtExpression = DynamicLogicExpressionBuilder.build(expressionString, encoder)
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+  }
+
+  test("Basic OR Test") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+    val OrLogic = new OrLogicExpression(leftLogic, rightLogic)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(10)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(OrLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(OrLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(OrLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(!OrLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    val expressionString = OrLogic.toExpressionString
+
+    assert(expressionString.equals("( Col1 < 0 OR Col1 > 1 )"))
+
+    val builtExpression = DynamicLogicExpressionBuilder.build(expressionString, encoder)
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 5)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 15)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    valueFromQueryValueArray(1) = encoder.encode(IntegerType, 10)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+
+  test("Basic Command Test") {
+    val greaterLogic = new GreaterThanLogicExpression("Col1", 0)
+    greaterLogic.setEncoder(encoder)
+    val greaterAndEqualLogic = new GreaterThanOrEqualLogicExpression("Col1", 0)
+    greaterAndEqualLogic.setEncoder(encoder)
+    val lessLogic = new LessThanLogicExpression("Col1", 0)
+    lessLogic.setEncoder(encoder)
+    val lessAndEqualLogic = new LessThanOrEqualLogicExpression("Col1", 0)
+    lessAndEqualLogic.setEncoder(encoder)
+    val equalLogic = new EqualLogicExpression("Col1", 0, false)
+    val notEqualLogic = new EqualLogicExpression("Col1", 0, true)
+    val passThrough = new PassThroughLogicExpression
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(10)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](1)
+
+    //great than
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    assert(!greaterLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 20)
+    assert(!greaterLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    //great than and equal
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 5)
+    assert(greaterAndEqualLogic.execute(columnToCurrentRowValueMap,
+      valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    assert(greaterAndEqualLogic.execute(columnToCurrentRowValueMap,
+      valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 20)
+    assert(!greaterAndEqualLogic.execute(columnToCurrentRowValueMap,
+      valueFromQueryValueArray))
+
+    //less than
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    assert(!lessLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 5)
+    assert(!lessLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    //less than and equal
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 20)
+    assert(lessAndEqualLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 20)
+    assert(lessAndEqualLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(IntegerType, 10)
+    assert(lessAndEqualLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    //equal too
+    valueFromQueryValueArray(0) = Bytes.toBytes(10)
+    assert(equalLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = Bytes.toBytes(5)
+    assert(!equalLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    //not equal too
+    valueFromQueryValueArray(0) = Bytes.toBytes(10)
+    assert(!notEqualLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = Bytes.toBytes(5)
+    assert(notEqualLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    //pass through
+    valueFromQueryValueArray(0) = Bytes.toBytes(10)
+    assert(passThrough.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = Bytes.toBytes(5)
+    assert(passThrough.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+
+
+  test("Double Type") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+    val andLogic = new AndLogicExpression(leftLogic, rightLogic)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(-4.0d)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, 15.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -5.0d)
+    assert(andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, 10.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -1.0d)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, -10.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -20.0d)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    val expressionString = andLogic.toExpressionString
+    // Note that here 0 and 1 is index, instead of value.
+    assert(expressionString.equals("( Col1 < 0 AND Col1 > 1 )"))
+
+    val builtExpression = DynamicLogicExpressionBuilder.build(expressionString, encoder)
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, 15.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -5.0d)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, 10.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -1.0d)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(DoubleType, -10.0d)
+    valueFromQueryValueArray(1) = encoder.encode(DoubleType, -20.0d)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+
+  test("Float Type") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+    val andLogic = new AndLogicExpression(leftLogic, rightLogic)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(-4.0f)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, 15.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -5.0f)
+    assert(andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, 10.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -1.0f)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, -10.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -20.0f)
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    val expressionString = andLogic.toExpressionString
+    // Note that here 0 and 1 is index, instead of value.
+    assert(expressionString.equals("( Col1 < 0 AND Col1 > 1 )"))
+
+    val builtExpression = DynamicLogicExpressionBuilder.build(expressionString, encoder)
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, 15.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -5.0f)
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, 10.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -1.0f)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(FloatType, -10.0f)
+    valueFromQueryValueArray(1) = encoder.encode(FloatType, -20.0f)
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+
+  test("String Type") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+    val andLogic = new AndLogicExpression(leftLogic, rightLogic)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes("row005")))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row015")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row000")
+    assert(andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row004")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row000")
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row020")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row010")
+    assert(!andLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    val expressionString = andLogic.toExpressionString
+    // Note that here 0 and 1 is index, instead of value.
+    assert(expressionString.equals("( Col1 < 0 AND Col1 > 1 )"))
+
+    val builtExpression = DynamicLogicExpressionBuilder.build(expressionString, encoder)
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row015")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row000")
+    assert(builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row004")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row000")
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+
+    valueFromQueryValueArray(0) = encoder.encode(StringType, "row020")
+    valueFromQueryValueArray(1) = encoder.encode(StringType, "row010")
+    assert(!builtExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+
+  test("Boolean Type") {
+    val leftLogic = new LessThanLogicExpression("Col1", 0)
+    leftLogic.setEncoder(encoder)
+    val rightLogic = new GreaterThanLogicExpression("Col1", 1)
+    rightLogic.setEncoder(encoder)
+
+    val columnToCurrentRowValueMap = new util.HashMap[String, ByteArrayComparable]()
+
+    columnToCurrentRowValueMap.put("Col1", new ByteArrayComparable(Bytes.toBytes(false)))
+    val valueFromQueryValueArray = new Array[Array[Byte]](2)
+    valueFromQueryValueArray(0) = encoder.encode(BooleanType, true)
+    valueFromQueryValueArray(1) = encoder.encode(BooleanType, false)
+    assert(leftLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+    assert(!rightLogic.execute(columnToCurrentRowValueMap, valueFromQueryValueArray))
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala
new file mode 100644
index 0000000..74bf912
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.spark.datasources.{DataTypeParserWrapper, DoubleSerDes, HBaseTableCatalog}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.spark.sql.types._
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+class HBaseCatalogSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
+
+  val map = s"""MAP<int, struct<varchar:string>>"""
+  val array = s"""array<struct<tinYint:tinyint>>"""
+  val arrayMap = s"""MAp<int, ARRAY<double>>"""
+  val catalog = s"""{
+                    |"table":{"namespace":"default", "name":"htable"},
+                    |"rowkey":"key1:key2",
+                    |"columns":{
+                    |"col1":{"cf":"rowkey", "col":"key1", "type":"string"},
+                    |"col2":{"cf":"rowkey", "col":"key2", "type":"double"},
+                    |"col3":{"cf":"cf1", "col":"col2", "type":"binary"},
+                    |"col4":{"cf":"cf1", "col":"col3", "type":"timestamp"},
+                    |"col5":{"cf":"cf1", "col":"col4", "type":"double", "serdes":"${classOf[DoubleSerDes].getName}"},
+                    |"col6":{"cf":"cf1", "col":"col5", "type":"$map"},
+                    |"col7":{"cf":"cf1", "col":"col6", "type":"$array"},
+                    |"col8":{"cf":"cf1", "col":"col7", "type":"$arrayMap"}
+                    |}
+                    |}""".stripMargin
+  val parameters = Map(HBaseTableCatalog.tableCatalog->catalog)
+  val t = HBaseTableCatalog(parameters)
+
+  def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
+    test(s"parse ${dataTypeString.replace("\n", "")}") {
+      assert(DataTypeParserWrapper.parse(dataTypeString) === expectedDataType)
+    }
+  }
+  test("basic") {
+    assert(t.getField("col1").isRowKey == true)
+    assert(t.getPrimaryKey == "key1")
+    assert(t.getField("col3").dt == BinaryType)
+    assert(t.getField("col4").dt == TimestampType)
+    assert(t.getField("col5").dt == DoubleType)
+    assert(t.getField("col5").serdes != None)
+    assert(t.getField("col4").serdes == None)
+    assert(t.getField("col1").isRowKey)
+    assert(t.getField("col2").isRowKey)
+    assert(!t.getField("col3").isRowKey)
+    assert(t.getField("col2").length == Bytes.SIZEOF_DOUBLE)
+    assert(t.getField("col1").length == -1)
+    assert(t.getField("col8").length == -1)
+  }
+
+  checkDataType(
+    map,
+    t.getField("col6").dt
+  )
+
+  checkDataType(
+    array,
+    t.getField("col7").dt
+  )
+
+  checkDataType(
+    arrayMap,
+    t.getField("col8").dt
+  )
+
+  test("convert") {
+    val m = Map("hbase.columns.mapping" ->
+      "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,",
+      "hbase.table" -> "t1")
+    val map = HBaseTableCatalog.convert(m)
+    val json = map.get(HBaseTableCatalog.tableCatalog).get
+    val parameters = Map(HBaseTableCatalog.tableCatalog->json)
+    val t = HBaseTableCatalog(parameters)
+    assert(t.getField("KEY_FIELD").isRowKey)
+    assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt)
+    assert(!t.getField("A_FIELD").isRowKey)
+    assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt)
+    assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt)
+  }
+
+  test("compatiblity") {
+    val m = Map("hbase.columns.mapping" ->
+      "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,",
+      "hbase.table" -> "t1")
+    val t = HBaseTableCatalog(m)
+    assert(t.getField("KEY_FIELD").isRowKey)
+    assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt)
+    assert(!t.getField("A_FIELD").isRowKey)
+    assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt)
+    assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt)
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala
new file mode 100644
index 0000000..5b42bd9
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import java.util.concurrent.ExecutorService
+import scala.util.Random
+
+import org.apache.hadoop.hbase.client.{BufferedMutator, Table, RegionLocator,
+  Connection, BufferedMutatorParams, Admin, TableBuilder}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.TableName
+import org.scalatest.FunSuite
+
+case class HBaseConnectionKeyMocker (confId: Int) extends HBaseConnectionKey (null) {
+  override def hashCode: Int = {
+    confId
+  }
+
+  override def equals(obj: Any): Boolean = {
+    if(!obj.isInstanceOf[HBaseConnectionKeyMocker])
+      false
+    else
+      confId == obj.asInstanceOf[HBaseConnectionKeyMocker].confId
+  }
+}
+
+class ConnectionMocker extends Connection {
+  var isClosed: Boolean = false
+
+  def getRegionLocator (tableName: TableName): RegionLocator = null
+  def getConfiguration: Configuration = null
+  def getTable (tableName: TableName): Table = null
+  def getTable(tableName: TableName, pool: ExecutorService): Table = null
+  def getBufferedMutator (params: BufferedMutatorParams): BufferedMutator = null
+  def getBufferedMutator (tableName: TableName): BufferedMutator = null
+  def getAdmin: Admin = null
+  def getTableBuilder(tableName: TableName, pool: ExecutorService): TableBuilder = null
+
+  def close(): Unit = {
+    if (isClosed)
+      throw new IllegalStateException()
+    isClosed = true
+  }
+
+  def isAborted: Boolean = true
+  def abort(why: String, e: Throwable) = {}
+}
+
+class HBaseConnectionCacheSuite extends FunSuite with Logging {
+  /*
+   * These tests must be performed sequentially as they operate with an
+   * unique running thread and resource.
+   *
+   * It looks there's no way to tell FunSuite to do so, so making those
+   * test cases normal functions which are called sequentially in a single
+   * test case.
+   */
+  test("all test cases") {
+    testBasic()
+    testWithPressureWithoutClose()
+    testWithPressureWithClose()
+  }
+
+  def cleanEnv() {
+    HBaseConnectionCache.connectionMap.synchronized {
+      HBaseConnectionCache.connectionMap.clear()
+      HBaseConnectionCache.cacheStat.numActiveConnections = 0
+      HBaseConnectionCache.cacheStat.numActualConnectionsCreated = 0
+      HBaseConnectionCache.cacheStat.numTotalRequests = 0
+    }
+  }
+
+  def testBasic() {
+    cleanEnv()
+    HBaseConnectionCache.setTimeout(1 * 1000)
+
+    val connKeyMocker1 = new HBaseConnectionKeyMocker(1)
+    val connKeyMocker1a = new HBaseConnectionKeyMocker(1)
+    val connKeyMocker2 = new HBaseConnectionKeyMocker(2)
+
+    val c1 = HBaseConnectionCache
+      .getConnection(connKeyMocker1, new ConnectionMocker)
+
+    assert(HBaseConnectionCache.connectionMap.size === 1)
+    assert(HBaseConnectionCache.getStat.numTotalRequests === 1)
+    assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 1)
+    assert(HBaseConnectionCache.getStat.numActiveConnections === 1)
+
+    val c1a = HBaseConnectionCache
+      .getConnection(connKeyMocker1a, new ConnectionMocker)
+
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 1)
+      assert(HBaseConnectionCache.getStat.numTotalRequests === 2)
+      assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 1)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 1)
+    }
+
+    val c2 = HBaseConnectionCache
+      .getConnection(connKeyMocker2, new ConnectionMocker)
+
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 2)
+      assert(HBaseConnectionCache.getStat.numTotalRequests === 3)
+      assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 2)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 2)
+    }
+
+    c1.close()
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 2)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 2)
+    }
+
+    c1a.close()
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 2)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 2)
+    }
+
+    Thread.sleep(3 * 1000) // Leave housekeeping thread enough time
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 1)
+      assert(HBaseConnectionCache.connectionMap.iterator.next()._1
+        .asInstanceOf[HBaseConnectionKeyMocker].confId === 2)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 1)
+    }
+
+    c2.close()
+  }
+
+  def testWithPressureWithoutClose() {
+    cleanEnv()
+
+    class TestThread extends Runnable {
+      override def run() {
+        for (i <- 0 to 999) {
+          val c = HBaseConnectionCache.getConnection(
+            new HBaseConnectionKeyMocker(Random.nextInt(10)), new ConnectionMocker)
+        }
+      }
+    }
+
+    HBaseConnectionCache.setTimeout(500)
+    val threads: Array[Thread] = new Array[Thread](100)
+    for (i <- 0 to 99) {
+      threads.update(i, new Thread(new TestThread()))
+      threads(i).run()
+    }
+    try {
+      threads.foreach { x => x.join() }
+    } catch {
+      case e: InterruptedException => println(e.getMessage)
+    }
+
+    Thread.sleep(1000)
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 10)
+      assert(HBaseConnectionCache.getStat.numTotalRequests === 100 * 1000)
+      assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 10)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 10)
+
+      var totalRc : Int = 0
+      HBaseConnectionCache.connectionMap.foreach {
+        x => totalRc += x._2.refCount
+      }
+      assert(totalRc === 100 * 1000)
+      HBaseConnectionCache.connectionMap.foreach {
+        x => {
+          x._2.refCount = 0
+          x._2.timestamp = System.currentTimeMillis() - 1000
+        }
+      }
+    }
+    Thread.sleep(1000)
+    assert(HBaseConnectionCache.connectionMap.size === 0)
+    assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 10)
+    assert(HBaseConnectionCache.getStat.numActiveConnections === 0)
+  }
+
+  def testWithPressureWithClose() {
+    cleanEnv()
+
+    class TestThread extends Runnable {
+      override def run() {
+        for (i <- 0 to 999) {
+          val c = HBaseConnectionCache.getConnection(
+            new HBaseConnectionKeyMocker(Random.nextInt(10)), new ConnectionMocker)
+          Thread.`yield`()
+          c.close()
+        }
+      }
+    }
+
+    HBaseConnectionCache.setTimeout(3 * 1000)
+    val threads: Array[Thread] = new Array[Thread](100)
+    for (i <- threads.indices) {
+      threads.update(i, new Thread(new TestThread()))
+      threads(i).run()
+    }
+    try {
+      threads.foreach { x => x.join() }
+    } catch {
+      case e: InterruptedException => println(e.getMessage)
+    }
+
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 10)
+      assert(HBaseConnectionCache.getStat.numTotalRequests === 100 * 1000)
+      assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 10)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 10)
+    }
+
+    Thread.sleep(6 * 1000)
+    HBaseConnectionCache.connectionMap.synchronized {
+      assert(HBaseConnectionCache.connectionMap.size === 0)
+      assert(HBaseConnectionCache.getStat.numActualConnectionsCreated === 10)
+      assert(HBaseConnectionCache.getStat.numActiveConnections === 0)
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala
new file mode 100644
index 0000000..83e2ac6
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.{ CellUtil, TableName, HBaseTestingUtility}
+import org.apache.spark.{SparkException, SparkContext}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+class HBaseContextSuite extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
+
+  @transient var sc: SparkContext = null
+  var TEST_UTIL = new HBaseTestingUtility
+
+  val tableName = "t1"
+  val columnFamily = "c"
+
+  override def beforeAll() {
+    TEST_UTIL.startMiniCluster()
+    logInfo(" - minicluster started")
+
+    try {
+      TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    } catch {
+      case e: Exception =>
+        logInfo(" - no table " + tableName + " found")
+    }
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+
+    val envMap = Map[String,String](("Xmx", "512m"))
+
+    sc = new SparkContext("local", "test", null, Nil, envMap)
+  }
+
+  override def afterAll() {
+    logInfo("shuting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+    logInfo(" - minicluster shut down")
+    TEST_UTIL.cleanupTestDir()
+    sc.stop()
+  }
+
+  test("bulkput to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1")))),
+      (Bytes.toBytes("2"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("foo2")))),
+      (Bytes.toBytes("3"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("c"), Bytes.toBytes("foo3")))),
+      (Bytes.toBytes("4"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("d"), Bytes.toBytes("foo")))),
+      (Bytes.toBytes("5"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("e"), Bytes.toBytes("bar"))))))
+
+    val hbaseContext = new HBaseContext(sc, config)
+    hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
+      TableName.valueOf(tableName),
+      (putRecord) => {
+        val put = new Put(putRecord._1)
+        putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
+        put
+      })
+
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      val foo1 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("1"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a"))))
+      assert(foo1 == "foo1")
+
+      val foo2 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("2"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("b"))))
+      assert(foo2 == "foo2")
+
+      val foo3 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("3"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("c"))))
+      assert(foo3 == "foo3")
+
+      val foo4 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("4"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("d"))))
+      assert(foo4 == "foo")
+
+      val foo5 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("5"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("e"))))
+      assert(foo5 == "bar")
+
+    } finally {
+      table.close()
+      connection.close()
+    }
+  }
+
+  test("bulkDelete to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("delete1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("delete2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("delete3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("delete1"),
+        Bytes.toBytes("delete3")))
+
+      val hbaseContext = new HBaseContext(sc, config)
+      hbaseContext.bulkDelete[Array[Byte]](rdd,
+        TableName.valueOf(tableName),
+        putRecord => new Delete(putRecord),
+        4)
+
+      assert(table.get(new Get(Bytes.toBytes("delete1"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")) == null)
+      assert(table.get(new Get(Bytes.toBytes("delete3"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")) == null)
+      assert(Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("delete2"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")))).equals("foo2"))
+    } finally {
+      table.close()
+      connection.close()
+    }
+  }
+
+  test("bulkGet to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("get1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    val getRdd = hbaseContext.bulkGet[Array[Byte], String](
+      TableName.valueOf(tableName),
+      2,
+      rdd,
+      record => {
+        new Get(record)
+      },
+      (result: Result) => {
+        if (result.listCells() != null) {
+          val it = result.listCells().iterator()
+          val B = new StringBuilder
+
+          B.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next()
+            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+            if (q.equals("counter")) {
+              B.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+            } else {
+              B.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+            }
+          }
+          "" + B.toString
+        } else {
+          ""
+        }
+      })
+    val getArray = getRdd.collect()
+
+    assert(getArray.length == 4)
+    assert(getArray.contains("get1:(a,foo1)"))
+    assert(getArray.contains("get2:(a,foo2)"))
+    assert(getArray.contains("get3:(a,foo3)"))
+
+  }
+
+  test("BulkGet failure test: bad table") {
+    val config = TEST_UTIL.getConfiguration
+
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    intercept[SparkException] {
+      try {
+        val getRdd = hbaseContext.bulkGet[Array[Byte], String](
+          TableName.valueOf("badTableName"),
+          2,
+          rdd,
+          record => {
+            new Get(record)
+          },
+          (result: Result) => "1")
+
+        getRdd.collect()
+
+        fail("We should have failed and not reached this line")
+      } catch {
+        case ex: SparkException => {
+          assert(
+            ex.getMessage.contains(
+              "org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException"))
+          throw ex
+        }
+      }
+    }
+  }
+
+  test("BulkGet failure test: bad column") {
+
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("get1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    val getRdd = hbaseContext.bulkGet[Array[Byte], String](
+      TableName.valueOf(tableName),
+      2,
+      rdd,
+      record => {
+        new Get(record)
+      },
+      (result: Result) => {
+        if (result.listCells() != null) {
+          val cellValue = result.getColumnLatestCell(
+            Bytes.toBytes("c"), Bytes.toBytes("bad_column"))
+          if (cellValue == null) "null" else "bad"
+        } else "noValue"
+      })
+    var nullCounter = 0
+    var noValueCounter = 0
+    getRdd.collect().foreach(r => {
+      if ("null".equals(r)) nullCounter += 1
+      else if ("noValue".equals(r)) noValueCounter += 1
+    })
+    assert(nullCounter == 3)
+    assert(noValueCounter == 1)
+  }
+
+  test("distributedScan to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("scan1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("scan2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("scan2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("foo-2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("scan3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("scan4"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("scan5"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    val scan = new Scan()
+    val filter = new FirstKeyOnlyFilter()
+    scan.setCaching(100)
+    scan.setStartRow(Bytes.toBytes("scan2"))
+    scan.setStopRow(Bytes.toBytes("scan4_"))
+    scan.setFilter(filter)
+
+    val scanRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
+
+    try {
+      val scanList = scanRdd.map(r => r._1.copyBytes()).collect()
+      assert(scanList.length == 3)
+      var cnt = 0
+      scanRdd.map(r => r._2.listCells().size()).collect().foreach(l => {
+        cnt += l
+      })
+      // the number of cells returned would be 4 without the Filter
+      assert(cnt == 3);
+    } catch {
+      case ex: Exception => ex.printStackTrace()
+    }
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala
new file mode 100644
index 0000000..7592525
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+import org.apache.spark.SparkContext
+import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+import scala.collection.mutable
+
+class HBaseDStreamFunctionsSuite  extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll with Logging {
+  @transient var sc: SparkContext = null
+
+  var TEST_UTIL: HBaseTestingUtility = new HBaseTestingUtility
+
+  val tableName = "t1"
+  val columnFamily = "c"
+
+  override def beforeAll() {
+
+    TEST_UTIL.startMiniCluster()
+
+    logInfo(" - minicluster started")
+    try
+      TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    catch {
+      case e: Exception => logInfo(" - no table " + tableName + " found")
+
+    }
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+
+    sc = new SparkContext("local", "test")
+  }
+
+  override def afterAll() {
+    TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    TEST_UTIL.shutdownMiniCluster()
+    sc.stop()
+  }
+
+  test("bulkput to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val rdd1 = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1")))),
+      (Bytes.toBytes("2"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("foo2")))),
+      (Bytes.toBytes("3"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("c"), Bytes.toBytes("foo3"))))))
+
+    val rdd2 = sc.parallelize(Array(
+      (Bytes.toBytes("4"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("d"), Bytes.toBytes("foo")))),
+      (Bytes.toBytes("5"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("e"), Bytes.toBytes("bar"))))))
+
+    var isFinished = false
+
+    val hbaseContext = new HBaseContext(sc, config)
+    val ssc = new StreamingContext(sc, Milliseconds(200))
+
+    val queue = mutable.Queue[RDD[(Array[Byte], Array[(Array[Byte],
+      Array[Byte], Array[Byte])])]]()
+    queue += rdd1
+    queue += rdd2
+    val dStream = ssc.queueStream(queue)
+
+    dStream.hbaseBulkPut(
+      hbaseContext,
+      TableName.valueOf(tableName),
+      (putRecord) => {
+        val put = new Put(putRecord._1)
+        putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
+        put
+      })
+
+    dStream.foreachRDD(rdd => {
+      if (rdd.count() == 0) {
+        isFinished = true
+      }
+    })
+
+    ssc.start()
+
+    while (!isFinished) {
+      Thread.sleep(100)
+    }
+
+    ssc.stop(true, true)
+
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      val foo1 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("1"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a"))))
+      assert(foo1 == "foo1")
+
+      val foo2 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("2"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("b"))))
+      assert(foo2 == "foo2")
+
+      val foo3 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("3"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("c"))))
+      assert(foo3 == "foo3")
+
+      val foo4 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("4"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("d"))))
+      assert(foo4 == "foo")
+
+      val foo5 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("5"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("e"))))
+      assert(foo5 == "bar")
+    } finally {
+      table.close()
+      connection.close()
+    }
+  }
+
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala
new file mode 100644
index 0000000..9ea2c7f
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.client._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility}
+import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
+import org.apache.spark.SparkContext
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+import scala.collection.mutable
+
+class HBaseRDDFunctionsSuite extends FunSuite with
+BeforeAndAfterEach with BeforeAndAfterAll with Logging {
+  @transient var sc: SparkContext = null
+  var TEST_UTIL: HBaseTestingUtility = new HBaseTestingUtility
+
+  val tableName = "t1"
+  val columnFamily = "c"
+
+  override def beforeAll() {
+
+    TEST_UTIL.startMiniCluster
+
+    logInfo(" - minicluster started")
+    try
+      TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    catch {
+      case e: Exception => logInfo(" - no table " + tableName + " found")
+
+    }
+    logInfo(" - creating table " + tableName)
+    TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+
+    sc = new SparkContext("local", "test")
+  }
+
+  override def afterAll() {
+    TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    logInfo("shuting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+
+    sc.stop()
+  }
+
+  test("bulkput to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1")))),
+      (Bytes.toBytes("2"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("foo2")))),
+      (Bytes.toBytes("3"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("c"), Bytes.toBytes("foo3")))),
+      (Bytes.toBytes("4"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("d"), Bytes.toBytes("foo")))),
+      (Bytes.toBytes("5"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("e"), Bytes.toBytes("bar"))))))
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    rdd.hbaseBulkPut(
+    hbaseContext,
+      TableName.valueOf(tableName),
+      (putRecord) => {
+        val put = new Put(putRecord._1)
+        putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
+        put
+      })
+
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      val foo1 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("1"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a"))))
+      assert(foo1 == "foo1")
+
+      val foo2 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("2"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("b"))))
+      assert(foo2 == "foo2")
+
+      val foo3 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("3"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("c"))))
+      assert(foo3 == "foo3")
+
+      val foo4 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("4"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("d"))))
+      assert(foo4 == "foo")
+
+      val foo5 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("5"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("e"))))
+      assert(foo5 == "bar")
+    } finally {
+      table.close()
+      connection.close()
+    }
+  }
+
+  test("bulkDelete to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("delete1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("delete2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("delete3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+
+      val rdd = sc.parallelize(Array(
+        Bytes.toBytes("delete1"),
+        Bytes.toBytes("delete3")))
+
+      val hbaseContext = new HBaseContext(sc, config)
+
+      rdd.hbaseBulkDelete(hbaseContext,
+        TableName.valueOf(tableName),
+        putRecord => new Delete(putRecord),
+        4)
+
+      assert(table.get(new Get(Bytes.toBytes("delete1"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")) == null)
+      assert(table.get(new Get(Bytes.toBytes("delete3"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")) == null)
+      assert(Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("delete2"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a")))).equals("foo2"))
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+  }
+
+  test("bulkGet to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("get1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    //Get with custom convert logic
+    val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2,
+      record => {
+        new Get(record)
+      },
+      (result: Result) => {
+        if (result.listCells() != null) {
+          val it = result.listCells().iterator()
+          val B = new StringBuilder
+
+          B.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next
+            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+            if (q.equals("counter")) {
+              B.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+            } else {
+              B.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+            }
+          }
+          "" + B.toString
+        } else {
+          ""
+        }
+      })
+
+    val getArray = getRdd.collect()
+
+    assert(getArray.length == 4)
+    assert(getArray.contains("get1:(a,foo1)"))
+    assert(getArray.contains("get2:(a,foo2)"))
+    assert(getArray.contains("get3:(a,foo3)"))
+  }
+
+  test("bulkGet default converter to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("get1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    val getRdd = rdd.hbaseBulkGet(hbaseContext, TableName.valueOf("t1"), 2,
+      record => {
+        new Get(record)
+      }).map((row) => {
+      if (row != null && row._2.listCells() != null) {
+        val it = row._2.listCells().iterator()
+        val B = new StringBuilder
+
+        B.append(Bytes.toString(row._2.getRow) + ":")
+
+        while (it.hasNext) {
+          val cell = it.next
+          val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+          if (q.equals("counter")) {
+            B.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+          } else {
+            B.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+          }
+        }
+        "" + B.toString
+      } else {
+        ""
+      }})
+
+    val getArray = getRdd.collect()
+
+    assert(getArray.length == 4)
+    assert(getArray.contains("get1:(a,foo1)"))
+    assert(getArray.contains("get2:(a,foo2)"))
+    assert(getArray.contains("get3:(a,foo3)"))
+  }
+
+  test("foreachPartition with puts to test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val rdd = sc.parallelize(Array(
+      (Bytes.toBytes("1foreach"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1")))),
+      (Bytes.toBytes("2foreach"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("b"), Bytes.toBytes("foo2")))),
+      (Bytes.toBytes("3foreach"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("c"), Bytes.toBytes("foo3")))),
+      (Bytes.toBytes("4foreach"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("d"), Bytes.toBytes("foo")))),
+      (Bytes.toBytes("5foreach"),
+        Array((Bytes.toBytes(columnFamily), Bytes.toBytes("e"), Bytes.toBytes("bar"))))))
+
+    val hbaseContext = new HBaseContext(sc, config)
+
+    rdd.hbaseForeachPartition(hbaseContext, (it, conn) => {
+      val bufferedMutator = conn.getBufferedMutator(TableName.valueOf("t1"))
+      it.foreach((putRecord) => {
+        val put = new Put(putRecord._1)
+        putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
+        bufferedMutator.mutate(put)
+      })
+      bufferedMutator.flush()
+      bufferedMutator.close()
+    })
+
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      val foo1 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("1foreach"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("a"))))
+      assert(foo1 == "foo1")
+
+      val foo2 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("2foreach"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("b"))))
+      assert(foo2 == "foo2")
+
+      val foo3 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("3foreach"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("c"))))
+      assert(foo3 == "foo3")
+
+      val foo4 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("4foreach"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("d"))))
+      assert(foo4 == "foo")
+
+      val foo5 = Bytes.toString(CellUtil.cloneValue(table.get(new Get(Bytes.toBytes("5"))).
+        getColumnLatestCell(Bytes.toBytes(columnFamily), Bytes.toBytes("e"))))
+      assert(foo5 == "bar")
+    } finally {
+      table.close()
+      connection.close()
+    }
+  }
+
+  test("mapPartitions with Get from test HBase client") {
+    val config = TEST_UTIL.getConfiguration
+    val connection = ConnectionFactory.createConnection(config)
+    val table = connection.getTable(TableName.valueOf("t1"))
+
+    try {
+      var put = new Put(Bytes.toBytes("get1"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo1"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get2"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo2"))
+      table.put(put)
+      put = new Put(Bytes.toBytes("get3"))
+      put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("a"), Bytes.toBytes("foo3"))
+      table.put(put)
+    } finally {
+      table.close()
+      connection.close()
+    }
+
+    val rdd = sc.parallelize(Array(
+      Bytes.toBytes("get1"),
+      Bytes.toBytes("get2"),
+      Bytes.toBytes("get3"),
+      Bytes.toBytes("get4")))
+    val hbaseContext = new HBaseContext(sc, config)
+
+    //Get with custom convert logic
+    val getRdd = rdd.hbaseMapPartitions(hbaseContext, (it, conn) => {
+      val table = conn.getTable(TableName.valueOf("t1"))
+      var res = mutable.MutableList[String]()
+
+      it.foreach(r => {
+        val get = new Get(r)
+        val result = table.get(get)
+        if (result.listCells != null) {
+          val it = result.listCells().iterator()
+          val B = new StringBuilder
+
+          B.append(Bytes.toString(result.getRow) + ":")
+
+          while (it.hasNext) {
+            val cell = it.next()
+            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
+            if (q.equals("counter")) {
+              B.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
+            } else {
+              B.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
+            }
+          }
+          res += "" + B.toString
+        } else {
+          res += ""
+        }
+      })
+      res.iterator
+    })
+
+    val getArray = getRdd.collect()
+
+    assert(getArray.length == 4)
+    assert(getArray.contains("get1:(a,foo1)"))
+    assert(getArray.contains("get2:(a,foo2)"))
+    assert(getArray.contains("get3:(a,foo3)"))
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseTestSource.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseTestSource.scala
new file mode 100644
index 0000000..ccb4625
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseTestSource.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf
+import org.apache.spark.SparkEnv
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+
+class HBaseTestSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    DummyScan(
+      parameters("cacheSize").toInt,
+      parameters("batchNum").toInt,
+      parameters("blockCacheingEnable").toBoolean,
+      parameters("rowNum").toInt)(sqlContext)
+  }
+}
+
+case class DummyScan(
+     cacheSize: Int,
+     batchNum: Int,
+     blockCachingEnable: Boolean,
+     rowNum: Int)(@transient val sqlContext: SQLContext)
+  extends BaseRelation with TableScan {
+  private def sparkConf = SparkEnv.get.conf
+  override def schema: StructType =
+    StructType(StructField("i", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan(): RDD[Row] = sqlContext.sparkContext.parallelize(0 until rowNum)
+    .map(Row(_))
+    .map{ x =>
+      if (sparkConf.getInt(HBaseSparkConf.QUERY_BATCHSIZE,
+          -1) != batchNum ||
+        sparkConf.getInt(HBaseSparkConf.QUERY_CACHEDROWS,
+          -1) != cacheSize ||
+        sparkConf.getBoolean(HBaseSparkConf.QUERY_CACHEBLOCKS,
+          false) != blockCachingEnable) {
+        throw new Exception("HBase Spark configuration cannot be set properly")
+      }
+      x
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala
new file mode 100644
index 0000000..4960084
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala
@@ -0,0 +1,522 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+import org.apache.hadoop.hbase.spark.datasources.{HBaseSparkConf, HBaseTableCatalog}
+import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName}
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+case class FilterRangeRecord(
+                              intCol0: Int,
+                              boolCol1: Boolean,
+                              doubleCol2: Double,
+                              floatCol3: Float,
+                              intCol4: Int,
+                              longCol5: Long,
+                              shortCol6: Short,
+                              stringCol7: String,
+                              byteCol8: Byte)
+
+object FilterRangeRecord {
+  def apply(i: Int): FilterRangeRecord = {
+    FilterRangeRecord(if (i % 2 == 0) i else -i,
+      i % 2 == 0,
+      if (i % 2 == 0) i.toDouble else -i.toDouble,
+      i.toFloat,
+      if (i % 2 == 0) i else -i,
+      i.toLong,
+      i.toShort,
+      s"String$i extra",
+      i.toByte)
+  }
+}
+
+class PartitionFilterSuite extends FunSuite with
+  BeforeAndAfterEach with BeforeAndAfterAll with Logging {
+  @transient var sc: SparkContext = null
+  var TEST_UTIL: HBaseTestingUtility = new HBaseTestingUtility
+
+  var sqlContext: SQLContext = null
+  var df: DataFrame = null
+
+  def withCatalog(cat: String): DataFrame = {
+    sqlContext
+      .read
+      .options(Map(HBaseTableCatalog.tableCatalog -> cat))
+      .format("org.apache.hadoop.hbase.spark")
+      .load()
+  }
+
+  override def beforeAll() {
+
+    TEST_UTIL.startMiniCluster
+    val sparkConf = new SparkConf
+    sparkConf.set(HBaseSparkConf.QUERY_CACHEBLOCKS, "true")
+    sparkConf.set(HBaseSparkConf.QUERY_BATCHSIZE, "100")
+    sparkConf.set(HBaseSparkConf.QUERY_CACHEDROWS, "100")
+
+    sc = new SparkContext("local", "test", sparkConf)
+    new HBaseContext(sc, TEST_UTIL.getConfiguration)
+    sqlContext = new SQLContext(sc)
+  }
+
+  override def afterAll() {
+    logInfo("shutting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+
+    sc.stop()
+  }
+
+  override def beforeEach(): Unit = {
+    DefaultSourceStaticUtils.lastFiveExecutionRules.clear()
+  }
+
+  // The original raw data used for construct result set without going through
+  // data frame logic. It is used to verify the result set retrieved from data frame logic.
+  val rawResult = (0 until 32).map { i =>
+    FilterRangeRecord(i)
+  }
+
+  def collectToSet[T](df: DataFrame): Set[T] = {
+    df.collect().map(_.getAs[T](0)).toSet
+  }
+  val catalog = s"""{
+                    |"table":{"namespace":"default", "name":"rangeTable"},
+                    |"rowkey":"key",
+                    |"columns":{
+                    |"intCol0":{"cf":"rowkey", "col":"key", "type":"int"},
+                    |"boolCol1":{"cf":"cf1", "col":"boolCol1", "type":"boolean"},
+                    |"doubleCol2":{"cf":"cf2", "col":"doubleCol2", "type":"double"},
+                    |"floatCol3":{"cf":"cf3", "col":"floatCol3", "type":"float"},
+                    |"intCol4":{"cf":"cf4", "col":"intCol4", "type":"int"},
+                    |"longCol5":{"cf":"cf5", "col":"longCol5", "type":"bigint"},
+                    |"shortCol6":{"cf":"cf6", "col":"shortCol6", "type":"smallint"},
+                    |"stringCol7":{"cf":"cf7", "col":"stringCol7", "type":"string"},
+                    |"byteCol8":{"cf":"cf8", "col":"byteCol8", "type":"tinyint"}
+                    |}
+                    |}""".stripMargin
+
+  test("populate rangeTable") {
+    val sql = sqlContext
+    import sql.implicits._
+
+    sc.parallelize(rawResult).toDF.write.options(
+      Map(HBaseTableCatalog.tableCatalog -> catalog, HBaseTableCatalog.newTable -> "5"))
+      .format("org.apache.hadoop.hbase.spark")
+      .save()
+  }
+  test("rangeTable full query") {
+    val df = withCatalog(catalog)
+    df.show
+    assert(df.count() === 32)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *| -23   |
+    *| -21   |
+    *| -19   |
+    *| -17   |
+    *| -15   |
+    *| -13   |
+    *| -11   |
+    *|  -9   |
+    *|  -7   |
+    *|  -5   |
+    *|  -3   |
+    *|  -1   |
+    *+----   +
+    */
+  test("rangeTable rowkey less than 0") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" < 0).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 < 0).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol4|
+    *+-------+
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *| -23   |
+    *| -21   |
+    *| -19   |
+    *| -17   |
+    *| -15   |
+    *| -13   |
+    *| -11   |
+    *|  -9   |
+    *|  -7   |
+    *|  -5   |
+    *|  -3   |
+    *|  -1   |
+    *+-------+
+    */
+  test("rangeTable int col less than 0") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol4" < 0).select($"intCol4")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol4 < 0).map(_.intCol4).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-----------+
+    *| doubleCol2|
+    *+-----------+
+    *|  0.0      |
+    *|  2.0      |
+    *|-31.0      |
+    *|-29.0      |
+    *|-27.0      |
+    *|-25.0      |
+    *|-23.0      |
+    *|-21.0      |
+    *|-19.0      |
+    *|-17.0      |
+    *|-15.0      |
+    *|-13.0      |
+    *|-11.0      |
+    *| -9.0      |
+    *| -7.0      |
+    *| -5.0      |
+    *| -3.0      |
+    *| -1.0      |
+    *+-----------+
+    */
+  test("rangeTable double col less than 0") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"doubleCol2" < 3.0).select($"doubleCol2")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.doubleCol2 < 3.0).map(_.doubleCol2).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Double](s)
+    assert(expected === result)
+  }
+
+  /**
+    * expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *| -23   |
+    *| -21   |
+    *| -19   |
+    *| -17   |
+    *| -15   |
+    *| -13   |
+    *| -11   |
+    *+-------+
+    *
+    */
+  test("rangeTable lessequal than -10") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" <= -10).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 <= -10).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+----+
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *| -23   |
+    *| -21   |
+    *| -19   |
+    *| -17   |
+    *| -15   |
+    *| -13   |
+    *| -11   |
+    *|  -9   |
+    *+-------+
+    */
+  test("rangeTable lessequal than -9") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" <= -9).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 <= -9).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|   0   |
+    *|   2   |
+    *|   4   |
+    *|   6   |
+    *|   8   |
+    *|  10   |
+    *|  12   |
+    *|  14   |
+    *|  16   |
+    *|  18   |
+    *|  20   |
+    *|  22   |
+    *|  24   |
+    *|  26   |
+    *|  28   |
+    *|  30   |
+    *|  -9   |
+    *|  -7   |
+    *|  -5   |
+    *|  -3   |
+    *+-------+
+    */
+  test("rangeTable greaterequal than -9") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" >= -9).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 >= -9).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|   0   |
+    *|   2   |
+    *|   4   |
+    *|   6   |
+    *|   8   |
+    *|  10   |
+    *|  12   |
+    *|  14   |
+    *|  16   |
+    *|  18   |
+    *|  20   |
+    *|  22   |
+    *|  24   |
+    *|  26   |
+    *|  28   |
+    *|  30   |
+    *+-------+
+    */
+  test("rangeTable greaterequal  than 0") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" >= 0).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 >= 0).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|  12   |
+    *|  14   |
+    *|  16   |
+    *|  18   |
+    *|  20   |
+    *|  22   |
+    *|  24   |
+    *|  26   |
+    *|  28   |
+    *|  30   |
+    *+-------+
+    */
+  test("rangeTable greater than 10") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" > 10).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 > 10).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|   0   |
+    *|   2   |
+    *|   4   |
+    *|   6   |
+    *|   8   |
+    *|  10   |
+    *|  -9   |
+    *|  -7   |
+    *|  -5   |
+    *|  -3   |
+    *|  -1   |
+    *+-------+
+    */
+  test("rangeTable and") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" > -10 && $"intCol0" <= 10).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(x => x.intCol0 > -10 && x.intCol0 <= 10 ).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|  12   |
+    *|  14   |
+    *|  16   |
+    *|  18   |
+    *|  20   |
+    *|  22   |
+    *|  24   |
+    *|  26   |
+    *|  28   |
+    *|  30   |
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *| -23   |
+    *| -21   |
+    *| -19   |
+    *| -17   |
+    *| -15   |
+    *| -13   |
+    *+-------+
+    */
+
+  test("or") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" <= -10 || $"intCol0" > 10).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(x => x.intCol0 <= -10 || x.intCol0 > 10).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+
+  /**
+    *expected result: only showing top 20 rows
+    *+-------+
+    *|intCol0|
+    *+-------+
+    *|   0   |
+    *|   2   |
+    *|   4   |
+    *|   6   |
+    *|   8   |
+    *|  10   |
+    *|  12   |
+    *|  14   |
+    *|  16   |
+    *|  18   |
+    *|  20   |
+    *|  22   |
+    *|  24   |
+    *|  26   |
+    *|  28   |
+    *|  30   |
+    *| -31   |
+    *| -29   |
+    *| -27   |
+    *| -25   |
+    *+-------+
+    */
+  test("rangeTable all") {
+    val sql = sqlContext
+    import sql.implicits._
+    val df = withCatalog(catalog)
+    val s = df.filter($"intCol0" >= -100).select($"intCol0")
+    s.show
+    // filter results without going through dataframe
+    val expected = rawResult.filter(_.intCol0 >= -100).map(_.intCol0).toSet
+    // filter results going through dataframe
+    val result = collectToSet[Int](s)
+    assert(expected === result)
+  }
+}
diff --git a/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/TableOutputFormatSuite.scala b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/TableOutputFormatSuite.scala
new file mode 100644
index 0000000..f64447d
--- /dev/null
+++ b/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/TableOutputFormatSuite.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.spark
+
+
+import java.text.SimpleDateFormat
+import java.util.{Date, Locale}
+
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
+import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName, TableNotFoundException}
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptID, TaskType}
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.spark.{SparkConf, SparkContext}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+
+import scala.util.{Failure, Success, Try}
+
+
+// Unit tests for HBASE-20521: change get configuration(TableOutputFormat.conf) object first sequence from jobContext to getConf
+// this suite contains two tests, one for normal case(getConf return null, use jobContext), create new TableOutputformat object without init TableOutputFormat.conf object,
+// configuration object inside checkOutputSpecs came from jobContext.
+// The other one(getConf return conf object) we manually call "setConf" to init TableOutputFormat.conf, for making it more straight forward, we specify a nonexistent table
+// name in conf object, checkOutputSpecs will then throw TableNotFoundException exception
+class TableOutputFormatSuite extends FunSuite with
+  BeforeAndAfterEach with BeforeAndAfterAll with Logging{
+  @transient var sc: SparkContext = null
+  var TEST_UTIL = new HBaseTestingUtility
+
+  val tableName = "TableOutputFormatTest"
+  val tableNameTest = "NonExistentTable"
+  val columnFamily = "cf"
+
+  override protected def beforeAll(): Unit = {
+    TEST_UTIL.startMiniCluster
+
+    logInfo(" - minicluster started")
+    try {
+      TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    }
+    catch {
+      case e: Exception => logInfo(" - no table " + tableName + " found")
+    }
+
+    TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily))
+    logInfo(" - created table")
+
+    //set "validateOutputSpecs" true anyway, force to validate output spec
+    val sparkConf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+
+    sc = new SparkContext(sparkConf)
+  }
+
+  override protected def afterAll(): Unit = {
+    logInfo(" - delete table: " + tableName)
+    TEST_UTIL.deleteTable(TableName.valueOf(tableName))
+    logInfo(" - shutting down minicluster")
+    TEST_UTIL.shutdownMiniCluster()
+
+    TEST_UTIL.cleanupTestDir()
+    sc.stop()
+  }
+
+  def getJobContext() = {
+    val hConf = TEST_UTIL.getConfiguration
+    hConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
+    val job = Job.getInstance(hConf)
+    job.setOutputFormatClass(classOf[TableOutputFormat[String]])
+
+    val jobTrackerId = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(new Date())
+    val jobAttemptId = new TaskAttemptID(jobTrackerId, 1, TaskType.MAP, 0, 0)
+    new TaskAttemptContextImpl(job.getConfiguration, jobAttemptId)
+  }
+
+  // Mock up jobContext object and execute actions in "write" function
+  // from "org.apache.spark.internal.io.SparkHadoopMapReduceWriter"
+  // this case should run normally without any exceptions
+  test("TableOutputFormat.checkOutputSpecs test without setConf called, should return true and without exceptions") {
+    val jobContext = getJobContext()
+    val format = jobContext.getOutputFormatClass
+    val jobFormat = format.newInstance
+    Try {
+      jobFormat.checkOutputSpecs(jobContext)
+    } match {
+      case Success(_) => assert(true)
+      case Failure(_) => assert(false)
+    }
+  }
+
+  // Set configuration externally, checkOutputSpec should use configuration object set by "SetConf" method
+  // rather than jobContext, this case should throw "TableNotFoundException" exception
+  test("TableOutputFormat.checkOutputSpecs test without setConf called, should throw TableNotFoundException") {
+    val jobContext = getJobContext()
+    val format = jobContext.getOutputFormatClass
+    val jobFormat = format.newInstance
+
+    val hConf = TEST_UTIL.getConfiguration
+    hConf.set(TableOutputFormat.OUTPUT_TABLE, tableNameTest)
+    jobFormat.asInstanceOf[TableOutputFormat[String]].setConf(hConf)
+    Try {
+      jobFormat.checkOutputSpecs(jobContext)
+    } match {
+      case Success(_) => assert(false)
+      case Failure(e: Exception) => {
+        if(e.isInstanceOf[TableNotFoundException])
+          assert(true)
+        else
+          assert(false)
+      }
+     case _ => None
+    }
+  }
+
+}
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..fd61fd0
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,120 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <!--
+  /**
+   * Licensed to the Apache Software Foundation (ASF) under one
+   * or more contributor license agreements.  See the NOTICE file
+   * distributed with this work for additional information
+   * regarding copyright ownership.  The ASF licenses this file
+   * to you under the Apache License, Version 2.0 (the
+   * "License"); you may not use this file except in compliance
+   * with the License.  You may obtain a copy of the License at
+   *
+   *     http://www.apache.org/licenses/LICENSE-2.0
+   *
+   * Unless required by applicable law or agreed to in writing, software
+   * distributed under the License is distributed on an "AS IS" BASIS,
+   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   * See the License for the specific language governing permissions and
+   * limitations under the License.
+   */
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hbase.connectors</groupId>
+    <artifactId>hbase-connectors</artifactId>
+    <version>${revision}</version>
+    <relativePath>../</relativePath>
+  </parent>
+  <groupId>org.apache.hbase.connectors</groupId>
+  <artifactId>spark</artifactId>
+  <packaging>pom</packaging>
+  <version>${revision}</version>
+  <name>Apache HBase - Spark Connectors</name>
+  <description>Spark Connectors for Apache HBase</description>
+  <modules>
+    <module>hbase-spark</module>
+    <module>hbase-spark-it</module>
+  </modules>
+  <properties>
+    <protobuf.plugin.version>0.6.1</protobuf.plugin.version>
+    <hbase-thirdparty.version>2.1.0</hbase-thirdparty.version>
+    <jackson.version>2.9.2</jackson.version>
+  </properties>
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.hbase.connectors.spark</groupId>
+        <artifactId>hbase-spark</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase.thirdparty</groupId>
+        <artifactId>hbase-shaded-miscellaneous</artifactId>
+        <version>${hbase-thirdparty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+        <version>${hadoop.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>xerces</groupId>
+            <artifactId>xercesImpl</artifactId>
+          </exclusion>
+          <exclusion>
+             <groupId>com.google.code.findbugs</groupId>
+             <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+        <version>${hadoop.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>xerces</groupId>
+            <artifactId>xercesImpl</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.xolstice.maven.plugins</groupId>
+          <artifactId>protobuf-maven-plugin</artifactId>
+          <version>${protobuf.plugin.version}</version>
+          <configuration>
+            <protocArtifact>com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+            <protoSourceRoot>${basedir}/src/main/protobuf/</protoSourceRoot>
+            <clearOutputDirectory>false</clearOutputDirectory>
+            <checkStaleness>true</checkStaleness>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services