You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Reinis Vicups <sp...@orbit-x.de> on 2014/12/01 10:50:19 UTC
Spark 1.1.0: weird spark-shell behavior
Hello,
I have two weird effects when working with spark-shell:
1. This code executed in spark-shell causes an exception below. At the
same time it works perfectly when submitted with spark-submit! :
import org.apache.hadoop.hbase.{HConstants, HBaseConfiguration}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.mahout.math.VectorWritable
import com.google.common.io.ByteStreams
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext.rddToPairRDDFunctions
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
val hConf = HBaseConfiguration.create()
hConf.set("hbase.defaults.for.version.skip", "true")
hConf.set("hbase.defaults.for.version", "0.98.6-cdh5.2.0")
hConf.set(HConstants.ZOOKEEPER_QUORUM, "myserv")
hConf.set(HConstants.ZOOKEEPER_CLIENT_PORT, "2181")
hConf.set(TableInputFormat.INPUT_TABLE, "MyNS:MyTable")
val rdd = sc.newAPIHadoopRDD(hConf, classOf[TableInputFormat],
classOf[ImmutableBytesWritable], classOf[Result])
rdd.count()
--- Exception ---
14/12/01 10:45:24 ERROR ExecutorUncaughtExceptionHandler: Uncaught
exception in thread Thread[Executor task launch worker-0,5,main]
java.lang.ExceptionInInitializerError
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:197)
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:159)
at
org.apache.hadoop.hbase.mapreduce.TableInputFormat.setConf(TableInputFormat.java:101)
at
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:113)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:104)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:66)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:180)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: hbase-default.xml file seems to
be for and old version of HBase (null), this version is 0.98.6-cdh5.2.0
at
org.apache.hadoop.hbase.HBaseConfiguration.checkDefaultsVersion(HBaseConfiguration.java:73)
at
org.apache.hadoop.hbase.HBaseConfiguration.addHbaseResources(HBaseConfiguration.java:105)
at
org.apache.hadoop.hbase.HBaseConfiguration.create(HBaseConfiguration.java:116)
at
org.apache.hadoop.hbase.client.HConnectionManager.<clinit>(HConnectionManager.java:222)
... 14 more
We have already checked most of the trivial stuff with class paths and
existenceof tables and column groups, enabled HBase specific settings to
avoid the version checking and so on. It appears that the supplied HBase
configuration is completely ignored by context. We tried to solve this
issue by instantiating own spark context and encountered the second
weird effect:
2. when attempting to instantiate own SparkContext we get an exception
below:
// imports block
...
|val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
--- Exception ---
2014-12-01 10:42:24,966 WARN o.e.j.u.c.AbstractLifeCycle - FAILED
SelectChannelConnector@0.0.0.0:4040: java.net.BindException: Die Adresse
wird bereits verwendet
java.net.BindException: Die Adresse wird bereits verwendet
at sun.nio.ch.Net.bind0(Native Method)
at sun.nio.ch.Net.bind(Net.java:444)
at sun.nio.ch.Net.bind(Net.java:436)
at
sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:214)
at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
at
org.eclipse.jetty.server.nio.SelectChannelConnector.open(SelectChannelConnector.java:187)
at
org.eclipse.jetty.server.AbstractConnector.doStart(AbstractConnector.java:316)
at
org.eclipse.jetty.server.nio.SelectChannelConnector.doStart(SelectChannelConnector.java:265)
at
org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
at org.eclipse.jetty.server.Server.doStart(Server.java:293)
at
org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
at
org.apache.spark.ui.JettyUtils$.org$apache$spark$ui$JettyUtils$$connect$1(JettyUtils.scala:199)
at
org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209)
at
org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209)
at
org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1449)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at
org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1445)
at
org.apache.spark.ui.JettyUtils$.startJettyServer(JettyUtils.scala:209)
at org.apache.spark.ui.WebUI.bind(WebUI.scala:102)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:224)
at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:24)
at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:29)
at $line22.$read$$iwC$$iwC.<init>(<console>:31)
at $line22.$read$$iwC.<init>(<console>:33)
at $line22.$read.<init>(<console>:35)
at $line22.$read$.<init>(<console>:39)
at $line22.$read$.<clinit>(<console>)
at $line22.$eval$.<init>(<console>:7)
at $line22.$eval$.<clinit>(<console>)
at $line22.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:846)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1119)
at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:672)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:703)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:667)
at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:819)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:864)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:776)
at
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:619)
at
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:627)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:632)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:959)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:907)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1002)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:331)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Has anyone already encountered these things? I recall that in earlier
spark-shell versions there was no issue with instantiating own spark
contexts, is this new to spark 1.1.0?
Thank you for your help and kind regards
reinis
|