You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by jm...@apache.org on 2015/12/21 16:37:27 UTC

phoenix git commit: PHOENIX-2503 Multiple Java NoClass/Method Errors with Spark and Phoenix

Repository: phoenix
Updated Branches:
  refs/heads/master 83344f1c5 -> 1c3e9495d


PHOENIX-2503 Multiple Java NoClass/Method Errors with Spark and Phoenix

The calcite dependency in the regular client JAR is pulling in a version
of com.fasterxml.jackson which is incompatible with the Spark runtime.

This patch creates a new assembly artifact, client-spark, which attempts
to include only the client JARs necessary for the Spark integration to
work.

Also made sure the Phoenix driver is explicitly registered in the PhoenixRDD


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/1c3e9495
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/1c3e9495
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/1c3e9495

Branch: refs/heads/master
Commit: 1c3e9495d0e04d30719679907511cac21e81363c
Parents: 83344f1
Author: Josh Mahonin <jm...@gmail.com>
Authored: Mon Dec 21 10:30:36 2015 -0500
Committer: Josh Mahonin <jm...@gmail.com>
Committed: Mon Dec 21 10:35:03 2015 -0500

----------------------------------------------------------------------
 phoenix-assembly/pom.xml                        | 17 ++++
 phoenix-assembly/src/build/client-spark.xml     | 87 ++++++++++++++++++++
 .../org/apache/phoenix/spark/PhoenixRDD.scala   |  6 ++
 3 files changed, 110 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/phoenix-assembly/pom.xml b/phoenix-assembly/pom.xml
index cdae90a..ab8f42b 100644
--- a/phoenix-assembly/pom.xml
+++ b/phoenix-assembly/pom.xml
@@ -64,6 +64,23 @@
               </descriptors>
             </configuration>
           </execution>
+          <!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client -->
+          <execution>
+            <id>client-spark</id>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+            <configuration>
+              <finalName>phoenix-${project.version}</finalName>
+              <attach>false</attach>
+              <appendAssemblyId>true</appendAssemblyId>
+              <descriptors>
+                <!-- build the phoenix spark client jar -->
+                <descriptor>src/build/client-spark.xml</descriptor>
+              </descriptors>
+            </configuration>
+          </execution>
           <execution>
             <id>client-minimal</id>
             <phase>package</phase>

http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-assembly/src/build/client-spark.xml
----------------------------------------------------------------------
diff --git a/phoenix-assembly/src/build/client-spark.xml b/phoenix-assembly/src/build/client-spark.xml
new file mode 100644
index 0000000..748f1d8
--- /dev/null
+++ b/phoenix-assembly/src/build/client-spark.xml
@@ -0,0 +1,87 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client -->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+  <id>client-spark</id>
+  <!-- All the dependencies (unpacked) necessary to run phoenix from a single, stand-alone jar -->
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <containerDescriptorHandlers>
+    <containerDescriptorHandler>
+      <!--
+          aggregate SPI's so that things like HDFS FileSystem works in uberjar
+          http://docs.oracle.com/javase/tutorial/sound/SPI-intro.html
+      -->
+      <handlerName>metaInf-services</handlerName>
+    </containerDescriptorHandler>
+  </containerDescriptorHandlers>
+
+  <componentDescriptors>
+    <componentDescriptor>src/build/components-minimal.xml</componentDescriptor>
+  </componentDescriptors>
+
+  <dependencySets>
+    <dependencySet>
+      <!-- Unpack all the dependencies to class files, since java doesn't support
+        jar of jars for running -->
+      <unpack>true</unpack>
+      <!-- save these dependencies to the top-level -->
+      <outputDirectory>/</outputDirectory>
+      <includes>
+        <include>org.apache.hbase:hbase*</include>
+        <include>org.apache.htrace:htrace-core</include>
+        <include>io.netty:netty-all</include>
+        <include>commons-codec:commons-codec</include>
+        <include>co.cask.tephra:tephra*</include>
+        <include>org.apache.twill:twill*</include>
+        <include>org.apache.thrift:*</include>
+        <include>com.google.code.gson:gson*</include>
+        <!-- We use a newer version of guava than HBase - this might be an issue? -->
+        <include>com.google.guava:guava</include>
+        <!-- HBase also pulls in these dependencies on its own, should we include-them? -->
+        <include>com.google.protobuf:protobuf-java</include>
+        <include>org.slf4j:slf4j-api</include>
+        <include>org.apache.zookeeper:zookeeper</include>
+        <include>log4j:log4j</include>
+        <include>org.apache.hadoop:hadoop*</include>
+        <include>commons-configuration:commons-configuration</include>
+        <include>commons-io:commons-io</include>
+        <include>commons-logging:commons-logging</include>
+        <include>commons-lang:commons-lang</include>
+        <include>commons-cli:commons-cli</include>
+        <include>org.apache.commons:commons-csv</include>
+        <include>org.codehaus.jackson:jackson-mapper-asl</include>
+        <include>org.codehaus.jackson:jackson-core-asl</include>
+        <include>commons-collections:commons-collections</include>
+        <include>joda-time:joda-time</include>
+        <include>org.jruby.joni:joni</include>
+        <include>org.jruby.jcodings:jcodings</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+</assembly>

http://git-wip-us.apache.org/repos/asf/phoenix/blob/1c3e9495/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
index fa36a1f..d79189b 100644
--- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
@@ -13,9 +13,12 @@
  */
 package org.apache.phoenix.spark
 
+import java.sql.DriverManager
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
 import org.apache.hadoop.io.NullWritable
+import org.apache.phoenix.jdbc.PhoenixDriver
 import org.apache.phoenix.mapreduce.PhoenixInputFormat
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil
 import org.apache.phoenix.schema.types._
@@ -32,6 +35,9 @@ class PhoenixRDD(sc: SparkContext, table: String, columns: Seq[String],
                  @transient conf: Configuration)
   extends RDD[PhoenixRecordWritable](sc, Nil) with Logging {
 
+  // Make sure to register the Phoenix driver
+  DriverManager.registerDriver(new PhoenixDriver)
+
   @transient lazy val phoenixConf = {
     getPhoenixConfiguration
   }