You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Simon Hebert <si...@gmail.com> on 2015/10/06 17:12:02 UTC

Trying PCA on spark but serialization is error thrown

Hi,

I tried to used the PCA object in one of my project but end up receiving a
serialization error. Any help would be appreciated. Example taken from
https://spark.apache.org/docs/latest/mllib-feature-extraction.html#pca

My Code:
val selector = new PCA(20)
val transformer = selector.fit(discretizedData.map(_.features))
val filteredData = discretizedData.map(lp => lp.copy(features =
transformer.transform(lp.features)))

Stack trace:
scala> val filteredData = discretizedData.map(lp => lp.copy(features =
transformer.transform(lp.features)))
org.apache.spark.SparkException: Task not serializable
        at
org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304)
        at
org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294)
        at
org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)
        at org.apache.spark.SparkContext.clean(SparkContext.scala:2021)
        at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:314)
        at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:313)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
        at org.apache.spark.rdd.RDD.map(RDD.scala:313)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:45)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47)
        at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:49)
        at $iwC$$iwC$$iwC$$iwC.<init>(<console>:51)
        at $iwC$$iwC$$iwC.<init>(<console>:53)
        at $iwC$$iwC.<init>(<console>:55)
        at $iwC.<init>(<console>:57)
        at <init>(<console>:59)
        at .<init>(<console>:63)
        at .<clinit>(<console>)
        at .<init>(<console>:7)
        at .<clinit>(<console>)
        at $print(<console>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
        at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
        at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
        at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
        at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
        at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
        at
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
        at
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
        at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
        at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
        at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
        at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
        at org.apache.spark.repl.Main$.main(Main.scala:31)
        at org.apache.spark.repl.Main.main(Main.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
        at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
        at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.io.NotSerializableException:
org.apache.spark.mllib.feature.PCA
Serialization stack:
        - object not serializable (class:
org.apache.spark.mllib.feature.PCA, value:
org.apache.spark.mllib.feature.PCA@51148636)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name:
selector, type: class org.apache.spark.mllib.feature.PCA)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@31a954d)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw,
type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@44c5b581)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type:
class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@e9bd4c6)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC@7086d8af)
        - field (class: $iwC$$iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC, $iwC$$iwC$$iwC$$iwC@6b416213)
        - field (class: $iwC$$iwC$$iwC, name: $iw, type: class
$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC, $iwC$$iwC$$iwC@4a7c586b)
        - field (class: $iwC$$iwC, name: $iw, type: class $iwC$$iwC$$iwC)
        - object (class $iwC$$iwC, $iwC$$iwC@33584ccf)
        - field (class: $iwC, name: $iw, type: class $iwC$$iwC)
        - object (class $iwC, $iwC@4a23a27c)
        - field (class: $line29.$read, name: $iw, type: class $iwC)
        - object (class $line29.$read, $line29.$read@c385fa9)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $VAL167,
type: class $line29.$read)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@5286994e)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name:
$outer, type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC,
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@523f572b)
        - field (class: $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1,
name: $outer, type: class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
        - object (class $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1,
<function1>)
        at
org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
        at
org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
        at
org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:84)
        at
org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301)
        ... 55 more


best Regards,
Simon