You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Jishnu Prathap <ji...@wipro.com> on 2014/12/10 08:52:51 UTC

Re: Spark SQL Stackoverflow error

Hi,

Unfortunately I am also getting the same error
Anybody solved it??..
Exception in main java.lang.stackoverflowerror
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
       at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
       at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
       at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
       at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
       at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)

Exact line where exception occurs.
sqlContext.sql("SELECT text FROM tweetTable LIMIT
10").collect().foreach(println)

The complete code is from github

https://github.com/databricks/reference-apps/blob/master/twitter_classifier/scala/src/main/scala/com/databricks/apps/twitter_classifier/ExamineAndTrain.scala

import com.google.gson.{GsonBuilder, JsonParser}
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.clustering.KMeans
/**
* Examine the collected tweets and trains a model based on them.
*/
object ExamineAndTrain {
val jsonParser = new JsonParser()
val gson = new GsonBuilder().setPrettyPrinting().create()
def main(args: Array[String]) {
// Process program arguments and set properties
/*if (args.length < 3) {
System.err.println("Usage: " + this.getClass.getSimpleName +
" <tweetInput> <outputModelDir> <numClusters> <numIterations>")
System.exit(1)
}
*
*/
   val outputModelDir="C:\\MLModel"
     val tweetInput="C:\\MLInput"
       val numClusters=10
       val numIterations=20
       
//val Array(tweetInput, outputModelDir, Utils.IntParam(numClusters),
Utils.IntParam(numIterations)) = args

val conf = new
SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[4]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
// Pretty print some of the tweets.
val tweets = sc.textFile(tweetInput)
println("------------Sample JSON Tweets-------")
for (tweet <- tweets.take(5)) {
println(gson.toJson(jsonParser.parse(tweet)))
}
val tweetTable = sqlContext.jsonFile(tweetInput).cache()
tweetTable.registerTempTable("tweetTable")
println("------Tweet table Schema---")
tweetTable.printSchema()
println("----Sample Tweet Text-----")

sqlContext.sql("SELECT text FROM tweetTable LIMIT
10").collect().foreach(println)



println("------Sample Lang, Name, text---")
sqlContext.sql("SELECT user.lang, user.name, text FROM tweetTable LIMIT
1000").collect().foreach(println)
println("------Total count by languages Lang, count(*)---")
sqlContext.sql("SELECT user.lang, COUNT(*) as cnt FROM tweetTable GROUP BY
user.lang ORDER BY cnt DESC LIMIT 25").collect.foreach(println)
println("--- Training the model and persist it")
val texts = sqlContext.sql("SELECT text from
tweetTable").map(_.head.toString)
// Cache the vectors RDD since it will be used for all the KMeans
iterations.
val vectors = texts.map(Utils.featurize).cache()
vectors.count() // Calls an action on the RDD to populate the vectors cache.
val model = KMeans.train(vectors, numClusters, numIterations)
sc.makeRDD(model.clusterCenters,
numClusters).saveAsObjectFile(outputModelDir)
val some_tweets = texts.take(100)
println("----Example tweets from the clusters")
for (i <- 0 until numClusters) {
println(s"\nCLUSTER $i:")
some_tweets.foreach { t =>
if (model.predict(Utils.featurize(t)) == i) {
println(t)
}
}
}
}
}



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Stackoverflow-error-tp12086p20605.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org