You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Jerry Chabot (JIRA)" <ji...@apache.org> on 2018/11/02 21:14:00 UTC
[jira] [Commented] (SPARK-25928) NoSuchMethodError net.jpountz.lz4.LZ4BlockInputStream.(Ljava/io/InputStream;Z)V

    [ https://issues.apache.org/jira/browse/SPARK-25928?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16673680#comment-16673680 ] 

Jerry Chabot commented on SPARK-25928:
--------------------------------------

I noticed our uber jar has spark classes in it. I removed them. The job ran longer, but the same error.

2018-11-02 20:59:30,212 [main] INFO org.apache.spark.deploy.yarn.Client - 
 client token: N/A
 diagnostics: User class threw exception: org.apache.spark.SparkException: Job aborted.
 at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:96)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
 at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
 at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
 at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
 at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
 at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
 at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
 at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957)
 at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1493)
 at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
 at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
 at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
 at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1472)
 at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:550)
 at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)
 at com.hpe.cmx.validate.blocks.ValidateBlocks.execute(ValidateBlocks.java:218)
 at com.hpe.cmx.validate.blocks.ValidateBlocks.main(ValidateBlocks.java:157)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:498)
 at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$4.run(ApplicationMaster.scala:721)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 14.0 failed 4 times, most recent failure: Lost task 2.3 in stage 14.0 (TID 196, ip-172-27-113-49.ec2.internal, executor 2): java.lang.NoSuchMethodError: net.jpountz.lz4.LZ4BlockInputStream.<init>(Ljava/io/InputStream;Z)V
 at org.apache.spark.io.LZ4CompressionCodec.compressedInputStream(CompressionCodec.scala:122)
 at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$6.apply(TorrentBroadcast.scala:304)
 at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$6.apply(TorrentBroadcast.scala:304)
 at scala.Option.map(Option.scala:146)
 at org.apache.spark.broadcast.TorrentBroadcast$.unBlockifyObject(TorrentBroadcast.scala:304)
 at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$apply$2.apply(TorrentBroadcast.scala:235)
 at scala.Option.getOrElse(Option.scala:121)
 at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:211)
 at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1346)
 at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:207)
 at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:66)
 at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:66)
 at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
 at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)
 at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:81)
 at org.apache.spark.scheduler.Task.run(Task.scala:109)
 at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)

> NoSuchMethodError net.jpountz.lz4.LZ4BlockInputStream.<init>(Ljava/io/InputStream;Z)V
> -------------------------------------------------------------------------------------
>
>                 Key: SPARK-25928
>                 URL: https://issues.apache.org/jira/browse/SPARK-25928
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Submit
>    Affects Versions: 2.3.1
>         Environment: EMR 5.17 which is using oozie 5.0.0 and spark 2.3.1
>            Reporter: Jerry Chabot
>            Priority: Major
>
> I am not sure if this is an Oozie problem, a Spark problem or a user error. It is blocking our upcoming release.
> We are upgrading from Amazon's EMR 5.7 to EMR 5.17. The version changes are:
>     oozie 4.3.0 -> 5.0.0
>      spark 2.1.1 -> 2.3.1
> All our Oozie/Spark jobs were working in EMR 5.7. After ugprading, some of our jobs which use a spark action are failing with the NoSuchMethod as shown further in the description. It seems like conflicting classes.
> I noticed the spark share lib directory has two versions of the LZ4 jar.
> sudo -u hdfs hadoop fs -ls /user/oozie/share/lib/lib_20181029182704/spark/*lz*
>  -rw-r--r--   3 oozie oozie      79845 2018-10-29 18:27 /user/oozie/share/lib/lib_20181029182704/spark/compress-lzf-1.0.3.jar
>  -rw-r--r--   3 hdfs  oozie     236880 2018-11-01 18:22 /user/oozie/share/lib/lib_20181029182704/spark/lz4-1.3.0.jar
>  -rw-r--r--   3 oozie oozie     370119 2018-10-29 18:27 /user/oozie/share/lib/lib_20181029182704/spark/lz4-java-1.4.0.jar
> But, both of these jars have the constructor LZ4BlockInputStream(java/io/InputStream). The spark/jars directory has only lz4-java-1.4.0.jar. share lib seems to be getting it from the /usr/lib/oozie/oozie-sharelib.tar.gz.
> Unfortunately, my team member that knows most about Spark is on vacation. Does anyone have any suggestions on how best to troubleshoot this problem?
> Here is the strack trace.
>       diagnostics: User class threw exception: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, ip-172-27-113-49.ec2.internal, executor 2): java.lang.NoSuchMethodError: net.jpountz.lz4.LZ4BlockInputStream.<init>(Ljava/io/InputStream;Z)V
>         at org.apache.spark.io.LZ4CompressionCodec.compressedInputStream(CompressionCodec.scala:122)
>         at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$6.apply(TorrentBroadcast.scala:304)
>         at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$6.apply(TorrentBroadcast.scala:304)
>         at scala.Option.map(Option.scala:146)
>         at org.apache.spark.broadcast.TorrentBroadcast$.unBlockifyObject(TorrentBroadcast.scala:304)
>         at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$apply$2.apply(TorrentBroadcast.scala:235)
>         at scala.Option.getOrElse(Option.scala:121)
>         at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:211)
>         at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1346)
>         at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:207)
>         at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:66)
>         at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:66)
>         at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
>         at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:86)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
>         at org.apache.spark.scheduler.Task.run(Task.scala:109)
>         at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
>         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org