You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by jm...@apache.org on 2015/12/21 16:37:27 UTC
phoenix git commit: PHOENIX-2503 Multiple Java NoClass/Method Errors
with Spark and Phoenix
Repository: phoenix
Updated Branches:
refs/heads/master 83344f1c5 -> 1c3e9495d
PHOENIX-2503 Multiple Java NoClass/Method Errors with Spark and Phoenix
The calcite dependency in the regular client JAR is pulling in a version
of com.fasterxml.jackson which is incompatible with the Spark runtime.
This patch creates a new assembly artifact, client-spark, which attempts
to include only the client JARs necessary for the Spark integration to
work.
Also made sure the Phoenix driver is explicitly registered in the PhoenixRDD
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/1c3e9495
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/1c3e9495
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/1c3e9495
Branch: refs/heads/master
Commit: 1c3e9495d0e04d30719679907511cac21e81363c
Parents: 83344f1
Author: Josh Mahonin <jm...@gmail.com>
Authored: Mon Dec 21 10:30:36 2015 -0500
Committer: Josh Mahonin <jm...@gmail.com>
Committed: Mon Dec 21 10:35:03 2015 -0500
----------------------------------------------------------------------
phoenix-assembly/pom.xml | 17 ++++
phoenix-assembly/src/build/client-spark.xml | 87 ++++++++++++++++++++
.../org/apache/phoenix/spark/PhoenixRDD.scala | 6 ++
3 files changed, 110 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/phoenix-assembly/pom.xml b/phoenix-assembly/pom.xml
index cdae90a..ab8f42b 100644
--- a/phoenix-assembly/pom.xml
+++ b/phoenix-assembly/pom.xml
@@ -64,6 +64,23 @@
</descriptors>
</configuration>
</execution>
+ <!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client -->
+ <execution>
+ <id>client-spark</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <finalName>phoenix-${project.version}</finalName>
+ <attach>false</attach>
+ <appendAssemblyId>true</appendAssemblyId>
+ <descriptors>
+ <!-- build the phoenix spark client jar -->
+ <descriptor>src/build/client-spark.xml</descriptor>
+ </descriptors>
+ </configuration>
+ </execution>
<execution>
<id>client-minimal</id>
<phase>package</phase>
http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-assembly/src/build/client-spark.xml
----------------------------------------------------------------------
diff --git a/phoenix-assembly/src/build/client-spark.xml b/phoenix-assembly/src/build/client-spark.xml
new file mode 100644
index 0000000..748f1d8
--- /dev/null
+++ b/phoenix-assembly/src/build/client-spark.xml
@@ -0,0 +1,87 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client -->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>client-spark</id>
+ <!-- All the dependencies (unpacked) necessary to run phoenix from a single, stand-alone jar -->
+ <formats>
+ <format>jar</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+
+ <containerDescriptorHandlers>
+ <containerDescriptorHandler>
+ <!--
+ aggregate SPI's so that things like HDFS FileSystem works in uberjar
+ http://docs.oracle.com/javase/tutorial/sound/SPI-intro.html
+ -->
+ <handlerName>metaInf-services</handlerName>
+ </containerDescriptorHandler>
+ </containerDescriptorHandlers>
+
+ <componentDescriptors>
+ <componentDescriptor>src/build/components-minimal.xml</componentDescriptor>
+ </componentDescriptors>
+
+ <dependencySets>
+ <dependencySet>
+ <!-- Unpack all the dependencies to class files, since java doesn't support
+ jar of jars for running -->
+ <unpack>true</unpack>
+ <!-- save these dependencies to the top-level -->
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>org.apache.hbase:hbase*</include>
+ <include>org.apache.htrace:htrace-core</include>
+ <include>io.netty:netty-all</include>
+ <include>commons-codec:commons-codec</include>
+ <include>co.cask.tephra:tephra*</include>
+ <include>org.apache.twill:twill*</include>
+ <include>org.apache.thrift:*</include>
+ <include>com.google.code.gson:gson*</include>
+ <!-- We use a newer version of guava than HBase - this might be an issue? -->
+ <include>com.google.guava:guava</include>
+ <!-- HBase also pulls in these dependencies on its own, should we include-them? -->
+ <include>com.google.protobuf:protobuf-java</include>
+ <include>org.slf4j:slf4j-api</include>
+ <include>org.apache.zookeeper:zookeeper</include>
+ <include>log4j:log4j</include>
+ <include>org.apache.hadoop:hadoop*</include>
+ <include>commons-configuration:commons-configuration</include>
+ <include>commons-io:commons-io</include>
+ <include>commons-logging:commons-logging</include>
+ <include>commons-lang:commons-lang</include>
+ <include>commons-cli:commons-cli</include>
+ <include>org.apache.commons:commons-csv</include>
+ <include>org.codehaus.jackson:jackson-mapper-asl</include>
+ <include>org.codehaus.jackson:jackson-core-asl</include>
+ <include>commons-collections:commons-collections</include>
+ <include>joda-time:joda-time</include>
+ <include>org.jruby.joni:joni</include>
+ <include>org.jruby.jcodings:jcodings</include>
+ </includes>
+ </dependencySet>
+ </dependencySets>
+</assembly>
http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
index fa36a1f..d79189b 100644
--- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
@@ -13,9 +13,12 @@
*/
package org.apache.phoenix.spark
+import java.sql.DriverManager
+
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
import org.apache.hadoop.io.NullWritable
+import org.apache.phoenix.jdbc.PhoenixDriver
import org.apache.phoenix.mapreduce.PhoenixInputFormat
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil
import org.apache.phoenix.schema.types._
@@ -32,6 +35,9 @@ class PhoenixRDD(sc: SparkContext, table: String, columns: Seq[String],
@transient conf: Configuration)
extends RDD[PhoenixRecordWritable](sc, Nil) with Logging {
+ // Make sure to register the Phoenix driver
+ DriverManager.registerDriver(new PhoenixDriver)
+
@transient lazy val phoenixConf = {
getPhoenixConfiguration
}