You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by me...@apache.org on 2019/10/01 10:15:45 UTC
[hbase-connectors] branch master updated: HBASE-22817 Use
hbase-shaded dependencies in hbase-spark (#42)
This is an automated email from the ASF dual-hosted git repository.
meszibalu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase-connectors.git
The following commit(s) were added to refs/heads/master by this push:
new 4dafb92 HBASE-22817 Use hbase-shaded dependencies in hbase-spark (#42)
4dafb92 is described below
commit 4dafb92b44a11f1e9d7f86cb4e6a0ab53495151e
Author: Balazs Meszaros <me...@apache.org>
AuthorDate: Tue Oct 1 12:15:41 2019 +0200
HBASE-22817 Use hbase-shaded dependencies in hbase-spark (#42)
* HBASE-22817 Use hbase-shaded dependencies in hbase-spark
* hbase-spark-protocol: a new project for the protobuf files.
* hbase-spark-protocol-shaded: the shaded version of hbase-spark-protocol,
which is compatible with hbase thirdparty protobuf.
hbase-spark codebase now depends on only hbase-shaded-* stuff and
hbase-spark-protocol-shaded.
* update our use of the flatten plugin to avoid it trying to re-flatten an already flattened and then deleted pom.
* update personality to wrap the yetus plugin for maven so that we can work around our need for "package" in all maven invocations.
* set the shade plugin's version to clear a warning.
Co-authored-by: Sean Busbey <bu...@apache.org>
---
dev-support/hbase-personality.sh | 247 +++++++++++++++++-
pom.xml | 29 ++-
spark/hbase-spark-it/pom.xml | 54 +---
spark/hbase-spark-protocol-shaded/pom.xml | 89 +++++++
spark/hbase-spark-protocol/pom.xml | 76 ++++++
.../src/main/protobuf/SparkFilter.proto | 0
spark/hbase-spark/pom.xml | 277 +--------------------
.../hadoop/hbase/spark/SparkSQLPushDownFilter.java | 17 +-
.../hbase/spark/datasources/JavaBytesEncoder.scala | 7 +-
spark/pom.xml | 84 ++-----
10 files changed, 484 insertions(+), 396 deletions(-)
diff --git a/dev-support/hbase-personality.sh b/dev-support/hbase-personality.sh
index c629b36..4077949 100755
--- a/dev-support/hbase-personality.sh
+++ b/dev-support/hbase-personality.sh
@@ -16,26 +16,265 @@
personality_plugins "all"
+function personality_globals
+{
+ # See notes down by the definition
+ BUILDTOOL=hb_maven
+}
+
# customizing yetus build to run scaladoc plugin only on hbase-spark project
function personality_modules
{
local testtype="$2"
+ local extra=""
+ local MODULES=("${CHANGED_MODULES[@]}")
clear_personality_queue
+ # Always to install at root.
if [[ ${testtype} == mvninstall ]]; then
# shellcheck disable=SC2086
- personality_enqueue_module .
+ personality_enqueue_module . "${extra}"
return
fi
- for m in "${CHANGED_MODULES[@]}"; do
+ # If root is in the list of changes, just do whatever test at root
+ if [[ "${MODULES[*]}" =~ \. ]]; then
+ MODULES=(.)
+ fi
+
+ # If we'll end up needing a plugin to run on the hbase-spark or
+ # hbase-spark-it modules, then we need to ensure a 'package' phase runs.
+ if [[ "${MODULES[*]}" =~ \. ]] || \
+ [[ "${MODULES[*]}" =~ "hbase-spark " ]] || \
+ [[ "${MODULES[*]}" =~ "hbase-spark-it" ]]; then
+ extra="${extra} package"
+ fi
+
+ for m in "${MODULES[@]}"; do
if [[ "$testtype" != "scaladoc" ]]; then
- personality_enqueue_module "${m}"
+ personality_enqueue_module "${m}" "${extra}"
else
if [[ "$m" == "spark/hbase-spark" ]]; then
- personality_enqueue_module spark/hbase-spark
+ personality_enqueue_module spark/hbase-spark "${extra}"
fi
fi
done
}
+
+# { Start workaround stuff caused by our need for a package phase to run
+# on hbase-spark-protocol and hbase-spark-protocol-shaded
+#
+# By default in Yetus any extra parameters given by a personality
+# go at the start of the build tool's execution. Unfortunately,
+# the order of maven phases matters for how maven chooses to run
+# those phases. For the specific plugins we have (i.e. the shade plugin)
+# we need the package phase to be ordered after any clean phase requested
+# several of the invocation called by Yetus's maven plugin include a clean.
+#
+# To work around this, we define a new build tool 'hb_maven' that
+# mostly wraps the built in maven plugin. It differs when calling the
+# module worker function for specific tests. Instead of calling the
+# built in one from Yetus (that isn't replaceable) we instead call one of
+# our own making. This new module workers function puts additional
+# parameters from personalities after the normal executor args.
+add_build_tool hb_maven
+
+
+# we redefine the modules_workers method here as well as the maven build
+# tool's call so that we can make sure any "clean" phases happen
+# before phases added as extra params
+# copied from Apache Yetus 0.11.0 release test-patch.sh
+function hb_modules_workers {
+ declare repostatus=$1
+ declare testtype=$2
+ shift 2
+ declare modindex=0
+ declare fn
+ declare savestart=${TIMER}
+ declare savestop
+ declare repo
+ declare modulesuffix
+ declare jdk=""
+ declare jdkindex=0
+ declare statusjdk
+ declare result=0
+ declare argv
+ declare execvalue
+
+ if [[ "${BUILDMODE}" = full ]]; then
+ repo="the source"
+ elif [[ ${repostatus} == branch ]]; then
+ repo=${PATCH_BRANCH}
+ else
+ repo="the patch"
+ fi
+
+ modules_reset
+
+ if verify_multijdk_test "${testtype}"; then
+ jdk=$(report_jvm_version "${JAVA_HOME}")
+ statusjdk=" with JDK v${jdk}"
+ jdk="-jdk${jdk}"
+ jdk=${jdk// /}
+ yetus_debug "Starting MultiJDK mode${statusjdk} on ${testtype}"
+ fi
+
+ until [[ ${modindex} -eq ${#MODULE[@]} ]]; do
+ start_clock
+
+ fn=$(module_file_fragment "${MODULE[${modindex}]}")
+ fn="${fn}${jdk}"
+ modulesuffix=$(basename "${MODULE[${modindex}]}")
+ if [[ ${modulesuffix} = \. ]]; then
+ modulesuffix="root"
+ fi
+
+ if ! buildtool_cwd "${modindex}"; then
+ echo "${BASEDIR}/${MODULE[${modindex}]} no longer exists. Skipping."
+ ((modindex=modindex+1))
+ savestop=$(stop_clock)
+ MODULE_STATUS_TIMER[${modindex}]=${savestop}
+ continue
+ fi
+
+ argv=("${@//@@@MODULEFN@@@/${fn}}")
+ argv=("${argv[@]//@@@MODULEDIR@@@/${BASEDIR}/${MODULE[${modindex}]}}")
+
+ # XX this bit below is what's different from yetus
+ # the order of executor args betwen built in an extra from personality
+ # are swapped
+ # shellcheck disable=2086,2046
+ echo_and_redirect "${PATCH_DIR}/${repostatus}-${testtype}-${fn}.txt" \
+ $("${BUILDTOOL}_executor" "${testtype}") \
+ "${argv[@]}" \
+ ${MODULEEXTRAPARAM[${modindex}]//@@@MODULEFN@@@/${fn}}
+ execvalue=$?
+ # XX end different bit
+
+ reaper_post_exec "${modulesuffix}" "${repostatus}-${testtype}-${fn}"
+ ((execvalue = execvalue + $? ))
+
+ if [[ ${execvalue} == 0 ]] ; then
+ module_status \
+ ${modindex} \
+ +1 \
+ "${repostatus}-${testtype}-${fn}.txt" \
+ "${modulesuffix} in ${repo} passed${statusjdk}."
+ else
+ module_status \
+ ${modindex} \
+ -1 \
+ "${repostatus}-${testtype}-${fn}.txt" \
+ "${modulesuffix} in ${repo} failed${statusjdk}."
+ ((result = result + 1))
+ fi
+
+ # compile is special
+ if [[ ${testtype} = compile ]]; then
+ MODULE_COMPILE_LOG[${modindex}]="${PATCH_DIR}/${repostatus}-${testtype}-${fn}.txt"
+ yetus_debug "Compile log set to ${MODULE_COMPILE_LOG[${modindex}]}"
+ fi
+
+ savestop=$(stop_clock)
+ MODULE_STATUS_TIMER[${modindex}]=${savestop}
+ # shellcheck disable=SC2086
+ echo "Elapsed: $(clock_display ${savestop})"
+ popd >/dev/null || return 1
+ ((modindex=modindex+1))
+ done
+
+ TIMER=${savestart}
+
+ if [[ ${result} -gt 0 ]]; then
+ return 1
+ fi
+ return 0
+}
+
+function hb_maven_initialize {
+ maven_initialize
+}
+
+function hb_maven_precheck {
+ maven_precheck
+}
+
+function hb_maven_filefilter {
+ maven_filefilter
+}
+
+function hb_maven_buildfile {
+ maven_buildfile
+}
+
+function hb_maven_executor {
+ maven_executor
+}
+
+function hb_maven_builtin_personality_modules {
+ maven_builtin_personality_modules
+}
+
+function hb_maven_builtin_personality_file_tests {
+ maven_builtin_personality_file_tests
+}
+
+function hb_maven_reorder_modules {
+ maven_reorder_modules
+}
+
+function hb_maven_docker_support {
+ maven_docker_support
+}
+
+# copied from Apache Yetus 0.11.0 maven.sh
+function hb_maven_modules_worker {
+ declare repostatus=$1
+ declare tst=$2
+ declare maven_unit_test_filter
+
+ maven_unit_test_filter="$(maven_unit_test_filter)"
+ # shellcheck disable=SC2034
+ UNSUPPORTED_TEST=false
+
+ case ${tst} in
+ findbugs)
+ hb_modules_workers "${repostatus}" findbugs test-compile findbugs:findbugs -DskipTests=true
+ ;;
+ compile)
+ hb_modules_workers "${repostatus}" compile clean test-compile -DskipTests=true
+ ;;
+ distclean)
+ hb_modules_workers "${repostatus}" distclean clean -DskipTests=true
+ ;;
+ javadoc)
+ hb_modules_workers "${repostatus}" javadoc clean javadoc:javadoc -DskipTests=true
+ ;;
+ scaladoc)
+ hb_modules_workers "${repostatus}" scaladoc clean scala:doc -DskipTests=true
+ ;;
+ spotbugs)
+ hb_modules_workers "${repostatus}" spotbugs test-compile spotbugs:spotbugs -DskipTests=true
+ ;;
+ unit)
+ if [[ -n "${maven_unit_test_filter}" ]]; then
+ hb_modules_workers "${repostatus}" unit clean test -fae "${maven_unit_test_filter}"
+ else
+ hb_modules_workers "${repostatus}" unit clean test -fae
+ fi
+ ;;
+ *)
+ # shellcheck disable=SC2034
+ UNSUPPORTED_TEST=true
+ if [[ ${repostatus} = patch ]]; then
+ add_footer_table "${tst}" "not supported by the ${BUILDTOOL} plugin"
+ fi
+ yetus_error "WARNING: ${tst} is unsupported by ${BUILDTOOL}"
+ return 1
+ ;;
+ esac
+}
+
+# } End workaround stuff caused by our need for a package phase to run
+# on hbase-spark-protocol and hbase-spark-protocol-shaded
diff --git a/pom.xml b/pom.xml
index cf2372d..aa0ea83 100755
--- a/pom.xml
+++ b/pom.xml
@@ -18,6 +18,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+ ON MVN COMPILE NOT WORKING
+
+ you can't rely on doing `mvn compile` or `mvn test-compile` direclty
+ because in the spark/hbase-spark module we need to refer to the classes
+ from hbase-spark-protocol after they've been rewritten in
+ hbase-spark-protocol-shaded to use the protobuf version from the
+ hbase-thirdparty project.
+
+ instead you should include a "package" phase in any commands you run
+ that won't implicitly include a package phase (e.g. install and verify
+ will implicitly include package)
+
-->
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -116,12 +129,12 @@
<compileSource>1.8</compileSource>
<java.min.version>${compileSource}</java.min.version>
<maven.min.version>3.5.0</maven.min.version>
- <hbase.version>2.2.0</hbase.version>
+ <hbase.version>2.2.1</hbase.version>
<maven.compiler.version>3.6.1</maven.compiler.version>
<exec.maven.version>1.6.0</exec.maven.version>
<audience-annotations.version>0.5.0</audience-annotations.version>
<junit.version>4.12</junit.version>
- <hbase-thirdparty.version>2.1.0</hbase-thirdparty.version>
+ <hbase-thirdparty.version>2.2.1</hbase-thirdparty.version>
<hadoop-two.version>2.8.5</hadoop-two.version>
<hadoop-three.version>3.0.3</hadoop-three.version>
<hadoop.version>${hadoop-two.version}</hadoop.version>
@@ -270,6 +283,11 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-shaded-client</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>${hbase.version}</version>
</dependency>
@@ -282,7 +300,7 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-testing-util</artifactId>
+ <artifactId>hbase-shaded-testing-util</artifactId>
<version>${hbase.version}</version>
<scope>test</scope>
</dependency>
@@ -325,6 +343,11 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.2.1</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.version}</version>
<configuration>
diff --git a/spark/hbase-spark-it/pom.xml b/spark/hbase-spark-it/pom.xml
index 17b4b5c..c367f95 100644
--- a/spark/hbase-spark-it/pom.xml
+++ b/spark/hbase-spark-it/pom.xml
@@ -172,32 +172,19 @@
</build>
<dependencies>
- <!-- Intra-project dependencies -->
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-common</artifactId>
- <type>jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
</dependency>
+
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <type>jar</type>
+ <artifactId>hbase-shaded-testing-util</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-zookeeper</artifactId>
+ <artifactId>hbase-it</artifactId>
<type>test-jar</type>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase.connectors.spark</groupId>
@@ -206,18 +193,9 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-it</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
<artifactId>${compat.module}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-testing-util</artifactId>
- </dependency>
- <dependency>
<groupId>org.apache.hbase.thirdparty</groupId>
<artifactId>hbase-shaded-miscellaneous</artifactId>
</dependency>
@@ -245,19 +223,11 @@
</exclusion>
</exclusions>
</dependency>
+
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </dependency>
- <!-- Hadoop needs Netty 3.x at test scope for the minicluster -->
- <dependency>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- <version>${netty.hadoop.version}</version>
+ <groupId>com.fasterxml.jackson.module</groupId>
+ <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
+ <version>${jackson.version}</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -314,12 +284,6 @@
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>com.fasterxml.jackson.module</groupId>
- <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
- <version>${jackson.version}</version>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>junit</groupId>
diff --git a/spark/hbase-spark-protocol-shaded/pom.xml b/spark/hbase-spark-protocol-shaded/pom.xml
new file mode 100644
index 0000000..a429f30
--- /dev/null
+++ b/spark/hbase-spark-protocol-shaded/pom.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hbase.connectors</groupId>
+ <artifactId>spark</artifactId>
+ <version>${revision}</version>
+ <relativePath>../</relativePath>
+ </parent>
+
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol-shaded</artifactId>
+ <name>Apache HBase - Spark Protocol (Shaded)</name>
+
+ <dependencies>
+ <!-- marked as "optional" to keep it from being included
+ as a transitive dependency of this module. needed
+ because we don't make a dependency reduced pom after
+ shading.
+ -->
+ <dependency>
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase.thirdparty</groupId>
+ <artifactId>hbase-shaded-protobuf</artifactId>
+ <version>${hbase-thirdparty.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <shadeSourcesContent>true</shadeSourcesContent>
+ <createSourcesJar>true</createSourcesJar>
+ <!-- shade plugin is not compatible with flatten plugin
+ if we are generating a dependency reduced pom -->
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <relocations>
+ <relocation>
+ <pattern>com.google.protobuf</pattern>
+ <shadedPattern>org.apache.hbase.thirdparty.com.google.protobuf</shadedPattern>
+ </relocation>
+ </relocations>
+ <artifactSet>
+ <excludes>
+ <exclude>com.google.protobuf:protobuf-java</exclude>
+ <exclude>org.apache.hbase.thirdparty:*</exclude>
+ </excludes>
+ </artifactSet>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/spark/hbase-spark-protocol/pom.xml b/spark/hbase-spark-protocol/pom.xml
new file mode 100644
index 0000000..374b814
--- /dev/null
+++ b/spark/hbase-spark-protocol/pom.xml
@@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hbase.connectors</groupId>
+ <artifactId>spark</artifactId>
+ <version>${revision}</version>
+ <relativePath>../</relativePath>
+ </parent>
+
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol</artifactId>
+ <name>Apache HBase - Spark Protocol</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>compile-protoc</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/spark/hbase-spark/src/main/protobuf/SparkFilter.proto b/spark/hbase-spark-protocol/src/main/protobuf/SparkFilter.proto
similarity index 100%
rename from spark/hbase-spark/src/main/protobuf/SparkFilter.proto
rename to spark/hbase-spark-protocol/src/main/protobuf/SparkFilter.proto
diff --git a/spark/hbase-spark/pom.xml b/spark/hbase-spark/pom.xml
index 73360aa..0db9245 100644
--- a/spark/hbase-spark/pom.xml
+++ b/spark/hbase-spark/pom.xml
@@ -20,16 +20,18 @@
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
+
<parent>
<groupId>org.apache.hbase.connectors</groupId>
<artifactId>spark</artifactId>
<version>${revision}</version>
<relativePath>../</relativePath>
</parent>
+
<groupId>org.apache.hbase.connectors.spark</groupId>
<artifactId>hbase-spark</artifactId>
<name>Apache HBase - Spark Connector</name>
- <properties />
+
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
@@ -127,223 +129,11 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>thrift</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-api-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>servlet-api-2.5</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty-util</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-compiler</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.jboss.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
+ <artifactId>hbase-shaded-client</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop-compat</artifactId>
- <scope>test</scope>
- <type>test-jar</type>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>thrift</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-api-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>servlet-api-2.5</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty-util</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-compiler</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.jboss.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop2-compat</artifactId>
- <scope>test</scope>
- <type>test-jar</type>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>thrift</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-api-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>servlet-api-2.5</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty-util</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-compiler</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.jboss.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <!-- This is needed. Test does remove test dir
- which is in hbase zk testing utility which is
- in this package. mvn dependency:analyze can't
- see this. -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-zookeeper</artifactId>
- <scope>test</scope>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-protocol</artifactId>
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol-shaded</artifactId>
</dependency>
<dependency>
<groupId>org.apache.yetus</groupId>
@@ -351,13 +141,7 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-common</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-common</artifactId>
- <type>test-jar</type>
- <scope>test</scope>
+ <artifactId>hbase-shaded-testing-util</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
@@ -367,35 +151,8 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <scope>test</scope>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-mapreduce</artifactId>
- </dependency>
- <dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <artifactId>hbase-shaded-mapreduce</artifactId>
+ <version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
@@ -417,6 +174,7 @@
<version>${spark.version}</version>
</dependency>
</dependencies>
+
<build>
<plugins>
<plugin>
@@ -458,19 +216,6 @@
</executions>
</plugin>
<plugin>
- <groupId>org.xolstice.maven.plugins</groupId>
- <artifactId>protobuf-maven-plugin</artifactId>
- <executions>
- <execution>
- <id>compile-protoc</id>
- <phase>generate-sources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
@@ -516,6 +261,7 @@
</plugin>
</plugins>
</build>
+
<profiles>
<!-- Skip the tests in this module -->
<profile>
@@ -769,4 +515,5 @@
</build>
</profile>
</profiles>
+
</project>
diff --git a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java
index a17d2e6..57fe22e 100644
--- a/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java
+++ b/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java
@@ -17,9 +17,6 @@
package org.apache.hadoop.hbase.spark;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.InvalidProtocolBufferException;
-
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
@@ -29,7 +26,6 @@ import java.util.Objects;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
-import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
import org.apache.hadoop.hbase.filter.FilterBase;
import org.apache.hadoop.hbase.spark.datasources.BytesEncoder;
import org.apache.hadoop.hbase.spark.datasources.Field;
@@ -41,6 +37,9 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
+import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
+
import scala.collection.mutable.MutableList;
/**
@@ -256,17 +255,17 @@ public class SparkSQLPushDownFilter extends FilterBase{
builder.setDynamicLogicExpression(dynamicLogicExpression.toExpressionString());
for (byte[] valueFromQuery: valueFromQueryArray) {
- builder.addValueFromQueryArray(ByteStringer.wrap(valueFromQuery));
+ builder.addValueFromQueryArray(ByteString.copyFrom(valueFromQuery));
}
for (Map.Entry<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
- familyEntry : currentCellToColumnIndexMap.entrySet()) {
+ familyEntry : currentCellToColumnIndexMap.entrySet()) {
for (Map.Entry<ByteArrayComparable, String> qualifierEntry :
- familyEntry.getValue().entrySet()) {
+ familyEntry.getValue().entrySet()) {
columnMappingBuilder.setColumnFamily(
- ByteStringer.wrap(familyEntry.getKey().bytes()));
+ ByteString.copyFrom(familyEntry.getKey().bytes()));
columnMappingBuilder.setQualifier(
- ByteStringer.wrap(qualifierEntry.getKey().bytes()));
+ ByteString.copyFrom(qualifierEntry.getKey().bytes()));
columnMappingBuilder.setColumnName(qualifierEntry.getValue());
builder.addCellToColumnMapping(columnMappingBuilder.build());
}
diff --git a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala
index 95d4547..a8d804d 100644
--- a/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala
+++ b/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala
@@ -17,12 +17,11 @@
package org.apache.hadoop.hbase.spark.datasources
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience
import org.apache.hadoop.hbase.spark.Logging
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.yetus.audience.InterfaceStability;
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
-import org.apache.hadoop.hbase.util.Bytes
+import org.apache.yetus.audience.InterfaceAudience
+import org.apache.yetus.audience.InterfaceStability
import org.apache.spark.sql.types._
/**
diff --git a/spark/pom.xml b/spark/pom.xml
index 2b3b09d..917b376 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -21,22 +21,28 @@
*/
-->
<modelVersion>4.0.0</modelVersion>
+
<parent>
<groupId>org.apache.hbase.connectors</groupId>
<artifactId>hbase-connectors</artifactId>
<version>${revision}</version>
<relativePath>../</relativePath>
</parent>
+
<groupId>org.apache.hbase.connectors</groupId>
<artifactId>spark</artifactId>
<packaging>pom</packaging>
<version>${revision}</version>
<name>Apache HBase - Spark</name>
<description>Spark Connectors for Apache HBase</description>
+
<modules>
+ <module>hbase-spark-protocol</module>
+ <module>hbase-spark-protocol-shaded</module>
<module>hbase-spark</module>
<module>hbase-spark-it</module>
</modules>
+
<properties>
<protobuf.plugin.version>0.6.1</protobuf.plugin.version>
<hbase-thirdparty.version>2.1.0</hbase-thirdparty.version>
@@ -47,6 +53,7 @@
<scala.version>2.11.12</scala.version>
<scala.binary.version>2.11</scala.binary.version>
</properties>
+
<dependencyManagement>
<dependencies>
<dependency>
@@ -55,79 +62,23 @@
<version>${revision}</version>
</dependency>
<dependency>
- <groupId>org.apache.hbase.thirdparty</groupId>
- <artifactId>hbase-shaded-miscellaneous</artifactId>
- <version>${hbase-thirdparty.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- <version>${hadoop.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>xerces</groupId>
- <artifactId>xercesImpl</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.google.code.findbugs</groupId>
- <artifactId>jsr305</artifactId>
- </exclusion>
- </exclusions>
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol</artifactId>
+ <version>${revision}</version>
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- <version>${hadoop.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>xerces</groupId>
- <artifactId>xercesImpl</artifactId>
- </exclusion>
- </exclusions>
+ <groupId>org.apache.hbase.connectors.spark</groupId>
+ <artifactId>hbase-spark-protocol-shaded</artifactId>
+ <version>${revision}</version>
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>xerces</groupId>
- <artifactId>xercesImpl</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.google.code.findbugs</groupId>
- <artifactId>jsr305</artifactId>
- </exclusion>
- </exclusions>
+ <groupId>org.apache.hbase.thirdparty</groupId>
+ <artifactId>hbase-shaded-miscellaneous</artifactId>
+ <version>${hbase-thirdparty.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
+
<build>
<pluginManagement>
<plugins>
@@ -150,4 +101,5 @@
</plugins>
</pluginManagement>
</build>
+
</project>