You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spark.apache.org by Rachana Srivastava <Ra...@markmonitor.com> on 2015/09/29 04:01:31 UTC
spark-submit classloader issue...
Hello all,
Goal: I want to use APIs from HttpClient library 4.4.1. I am using maven shaded plugin to generate JAR.
Findings: When I run my program as a java application within eclipse everything works fine. But when I am running the program using spark-submit I am getting following error:
URL content Could not initialize class org.apache.http.conn.ssl.SSLConnectionSocketFactory
java.lang.NoClassDefFoundError: Could not initialize class org.apache.http.conn.ssl.SSLConnectionSocketFactory
When I tried to get the referred JAR it is pointing to some Hadoop JAR, I am assuming this is something set in spark-submit.
ClassLoader classLoader = HttpEndPointClient.class.getClassLoader();
URL resource = classLoader.getResource("org/apache/http/message/BasicLineFormatter.class");
Prints following jar:
jar:file:/usr/lib/hadoop/lib/httpcore-4.2.5.jar!/org/apache/http/message/BasicLineFormatter.class
After research I found that I can override --conf spark.files.userClassPathFirst=true --conf spark.yarn.user.classpath.first=true
But when I do that I am getting following error:
ERROR: org.apache.spark.executor.Executor - Exception in task 0.0 in stage 0.0 (TID 0)
java.io.InvalidClassException: org.apache.spark.scheduler.Task; local class incompatible: stream classdesc serialVersionUID = -4703555755588060120, local class serialVersionUID = -1589734467697262504
at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:617)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1622)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1622)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:68)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:185)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
I am running on CDH 5.4 Here is my complete pom file.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd<http://maven.apache.org/POM/4.0.0%20http:/maven.apache.org/xsd/maven-4.0.0.xsd>">
<modelVersion>4.0.0</modelVersion>
<groupId>test</groupId>
<artifactId>test</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20140107</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<testSourceDirectory>src/test/java</testSourceDirectory>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<!-- download source code in Eclipse, best practice -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.9</version>
<configuration>
<downloadSources>true</downloadSources>
<downloadJavadocs>false</downloadJavadocs>
</configuration>
</plugin>
<!-- Set a compiler level -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
</plugin>
<!-- Maven Shade Plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<executions>
<!-- Run shade goal on package phase -->
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Thanks,
Rachana
Re: spark-submit classloader issue...
Posted by Aniket Bhatnagar <an...@gmail.com>.
Hi Rachna
Can you just use http client provided via spark transitive dependencies
instead of excluding them?
The reason user classpath first is failing could be because you have spark
artifacts in your assembly jar that dont match with what is deployed
(version mismatch or you built the version yourself, etc)
Thanks,
Aniket
On Tue, Sep 29, 2015, 7:31 AM Rachana Srivastava <
Rachana.Srivastava@markmonitor.com> wrote:
> Hello all,
>
> *Goal:* I want to use APIs from HttpClient library 4.4.1. I am using
> maven shaded plugin to generate JAR.
>
>
>
> *Findings:* When I run my program as a *java application within eclipse
> everything works fine*. But when I am running the program using
> *spark-submit* I am getting following error:
>
> URL content Could not initialize class
> org.apache.http.conn.ssl.SSLConnectionSocketFactory
>
> java.lang.NoClassDefFoundError: Could not initialize class
> org.apache.http.conn.ssl.SSLConnectionSocketFactory
>
>
>
> When I tried to get the referred JAR it is pointing to some Hadoop JAR, I
> am assuming this is something set in spark-submit.
>
>
>
> ClassLoader classLoader = HttpEndPointClient.class.getClassLoader();
>
> URL resource =
> classLoader.getResource("org/apache/http/message/BasicLineFormatter.class");
>
> Prints following jar:
>
>
> jar:file:/usr/lib/hadoop/lib/httpcore-4.2.5.jar!/org/apache/http/message/BasicLineFormatter.class
>
>
>
> After research I found that I can override *--conf
> spark.files.userClassPathFirst=true --conf
> spark.yarn.user.classpath.first=true*
>
>
>
> But when I do that I am getting following error:
>
> ERROR: org.apache.spark.executor.Executor - Exception in task 0.0 in stage
> 0.0 (TID 0)
>
> java.io.InvalidClassException: org.apache.spark.scheduler.Task; local
> class incompatible: stream classdesc serialVersionUID =
> -4703555755588060120, local class serialVersionUID = -1589734467697262504
>
> at
> java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:617)
>
> at
> java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1622)
>
> at
> java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
>
> at
> java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1622)
>
> at
> java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
>
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771)
>
> at
> java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
>
> at
> java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
>
> at
> org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:68)
>
> at
> org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:94)
>
> at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:185)
>
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>
> at java.lang.Thread.run(Thread.java:745)
>
>
>
> I am running on CDH 5.4 Here is my complete pom file.
>
>
>
> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
> http://www.w3.org/2001/XMLSchema-instance"
>
> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> http://maven.apache.org/xsd/maven-4.0.0.xsd
> <http://maven.apache.org/POM/4.0.0%20http:/maven.apache.org/xsd/maven-4.0.0.xsd>
> ">
>
> <modelVersion>4.0.0</modelVersion>
>
> <groupId>test</groupId>
>
> <artifactId>test</artifactId>
>
> <version>0.0.1-SNAPSHOT</version>
>
> <dependencies>
>
> <dependency>
>
>
> <groupId>org.apache.httpcomponents</groupId>
>
>
> <artifactId>httpcore</artifactId>
>
> <version>4.4.1</version>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.apache.httpcomponents</groupId>
>
>
> <artifactId>httpclient</artifactId>
>
> <version>4.4.1</version>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.apache.spark</groupId>
>
>
> <artifactId>spark-streaming-kafka_2.10</artifactId>
>
> <version>1.5.0</version>
>
> <exclusions>
>
> <exclusion>
>
> <artifactId>httpcore</artifactId>
>
> <groupId>org.apache.httpcomponents</groupId>
>
> </exclusion>
>
> </exclusions>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.apache.spark</groupId>
>
>
> <artifactId>spark-streaming_2.10</artifactId>
>
> <version>1.5.0</version>
>
> <exclusions>
>
> <exclusion>
>
> <artifactId>httpcore</artifactId>
>
> <groupId>org.apache.httpcomponents</groupId>
>
> </exclusion>
>
> </exclusions>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.apache.spark</groupId>
>
>
> <artifactId>spark-core_2.10</artifactId>
>
> <version>1.5.0</version>
>
> <exclusions>
>
> <exclusion>
>
> <artifactId>httpcore</artifactId>
>
> <groupId>org.apache.httpcomponents</groupId>
>
> </exclusion>
>
> </exclusions>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.apache.spark</groupId>
>
>
> <artifactId>spark-mllib_2.10</artifactId>
>
> <version>1.5.0</version>
>
> <exclusions>
>
> <exclusion>
>
> <artifactId>httpcore</artifactId>
>
> <groupId>org.apache.httpcomponents</groupId>
>
> </exclusion>
>
> </exclusions>
>
> </dependency>
>
> <dependency>
>
> <groupId>org.json</groupId>
>
>
> <artifactId>json</artifactId>
>
> <version>20140107</version>
>
> </dependency>
>
> <dependency>
>
>
> <groupId>org.jsoup</groupId>
>
>
> <artifactId>jsoup</artifactId>
>
> <version>1.8.3</version>
>
> </dependency>
>
> </dependencies>
>
> <build>
>
>
> <sourceDirectory>src/main/java</sourceDirectory>
>
>
> <testSourceDirectory>src/test/java</testSourceDirectory>
>
> <resources>
>
> <resource>
>
>
> <directory>src/main/resources</directory>
>
> </resource>
>
> </resources>
>
> <plugins>
>
> <!-- download source code
> in Eclipse, best practice -->
>
> <plugin>
>
>
> <groupId>org.apache.maven.plugins</groupId>
>
>
> <artifactId>maven-eclipse-plugin</artifactId>
>
>
> <version>2.9</version>
>
>
> <configuration>
>
>
> <downloadSources>true</downloadSources>
>
>
> <downloadJavadocs>false</downloadJavadocs>
>
>
> </configuration>
>
> </plugin>
>
>
>
> <!-- Set a compiler level
> -->
>
> <plugin>
>
>
> <groupId>org.apache.maven.plugins</groupId>
>
>
> <artifactId>maven-compiler-plugin</artifactId>
>
>
> <version>2.3.2</version>
>
> </plugin>
>
> <!-- Maven Shade Plugin -->
>
> <plugin>
>
>
> <groupId>org.apache.maven.plugins</groupId>
>
>
> <artifactId>maven-shade-plugin</artifactId>
>
>
> <version>2.3</version>
>
>
> <executions>
>
>
> <!-- Run shade goal on package phase -->
>
>
> <execution>
>
>
> <phase>package</phase>
>
>
> <goals>
>
>
> <goal>shade</goal>
>
>
> </goals>
>
>
> <configuration>
>
>
> <filters>
>
>
>
> <filter>
>
>
> <artifact>*:*</artifact>
>
>
> <excludes>
>
>
> <exclude>META-INF/*.SF</exclude>
>
>
> <exclude>META-INF/*.DSA</exclude>
>
>
> <exclude>META-INF/*.RSA</exclude>
>
>
> </excludes>
>
>
> </filter>
>
>
> </filters>
>
>
> <transformers>
>
>
> <transformer
>
>
>
> implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
>
>
> </transformer>
>
>
> </transformers>
>
>
> </configuration>
>
>
> </execution>
>
>
> </executions>
>
> </plugin>
>
> </plugins>
>
> </build>
>
> </project>
>
>
>
> Thanks,
>
>
>
> Rachana
>
>
>