You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Simon Hebert <si...@gmail.com> on 2015/10/06 17:12:02 UTC
Trying PCA on spark but serialization is error thrown
Hi,
I tried to used the PCA object in one of my project but end up receiving a
serialization error. Any help would be appreciated. Example taken from
https://spark.apache.org/docs/latest/mllib-feature-extraction.html#pca
My Code:
val selector = new PCA(20)
val transformer = selector.fit(discretizedData.map(_.features))
val filteredData = discretizedData.map(lp => lp.copy(features =
transformer.transform(lp.features)))
Stack trace:
scala> val filteredData = discretizedData.map(lp => lp.copy(features =
transformer.transform(lp.features)))
org.apache.spark.SparkException: Task not serializable
at
org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304)
at
org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294)
at
org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2021)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:314)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:313)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.map(RDD.scala:313)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:45)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:49)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:51)
at $iwC$$iwC$$iwC.<init>(<console>:53)
at $iwC$$iwC.<init>(<console>:55)
at $iwC.<init>(<console>:57)
at <init>(<console>:59)
at .<init>(<console>:63)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.io.NotSerializableException:
org.apache.spark.mllib.feature.PCA
Serialization stack:
- object not serializable (class:
org.apache.spark.mllib.feature.PCA, value:
org.apache.spark.mllib.feature.PCA@51148636)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name:
selector, type: class org.apache.spark.mllib.feature.PCA)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@31a954d)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw,
type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@44c5b581)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type:
class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@e9bd4c6)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC@7086d8af)
- field (class: $iwC$$iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC, $iwC$$iwC$$iwC$$iwC@6b416213)
- field (class: $iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC, $iwC$$iwC$$iwC@4a7c586b)
- field (class: $iwC$$iwC, name: $iw, type: class $iwC$$iwC$$iwC)
- object (class $iwC$$iwC, $iwC$$iwC@33584ccf)
- field (class: $iwC, name: $iw, type: class $iwC$$iwC)
- object (class $iwC, $iwC@4a23a27c)
- field (class: $line29.$read, name: $iw, type: class $iwC)
- object (class $line29.$read, $line29.$read@c385fa9)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $VAL167,
type: class $line29.$read)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@5286994e)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name:
$outer, type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@523f572b)
- field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1,
name: $outer, type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
- object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1,
<function1>)
at
org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at
org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
at
org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:84)
at
org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301)
... 55 more
best Regards,
Simon