You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Raj (JIRA)" <ji...@apache.org> on 2018/10/16 17:40:00 UTC
[jira] [Updated] (SPARK-25749) Exception thrown while reading avro
file with large schema
[ https://issues.apache.org/jira/browse/SPARK-25749?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Raj updated SPARK-25749:
------------------------
Attachment: MainCC.scala
EncoderExample.scala
> Exception thrown while reading avro file with large schema
> ----------------------------------------------------------
>
> Key: SPARK-25749
> URL: https://issues.apache.org/jira/browse/SPARK-25749
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.0, 2.3.1, 2.3.2
> Reporter: Raj
> Priority: Blocker
> Attachments: EncoderExample.scala, MainCC.scala
>
>
> Hi, We are migrating our jobs from Spark 2.2.0 to Spark 2.3.1. One of the job reads avro source that has large nested schema. The job fails for Spark 2.3.1(Have tested in Spark 2.3.0 & Spark 2.3.2 and the job fails in this case also). I am able to replicate this with some sample data. Please find below the code, build file & exception log
> *Code (EncoderExample.scala)*
>
> package com.rj.enc
> import com.rj.logger.CustomLogger
> import org.apache.log4j.Logger
> import com.rj.sc.SparkUtil
> import org.apache.spark.sql.catalyst.ScalaReflection
> import org.apache.spark.sql.types.StructType
> import org.apache.spark.sql.Encoders
> object EncoderExample {
>
> val log: Logger = CustomLogger.getLogger(this.getClass.getName.dropRight(1))
> val user = "xxx"
> val sourcePath = s"file:///Users/$user/del/avrodata"
> val resultPath = s"file:///Users/$user/del/pqdata"
>
> def main(args: Array[String]): Unit = {
> writeData() // Create sample data
> readData() // Read, Process & write back the results (App fails in this method for spark 2.3.1)
> }
>
> def readData(): Unit = {
> log.info("sourcePath -> " + sourcePath)
> val ss = SparkUtil.getSparkSession(this.getClass.getName)
> val schema = ScalaReflection.schemaFor[MainCC].dataType.asInstanceOf[StructType]
> import com.databricks.spark.avro._
> import ss.implicits._
> val ds = ss.sqlContext.read.schema(schema).option("basePath", sourcePath).
> avro(this.sourcePath).as[MainCC]
> log.info("Schema -> " + ds.schema.treeString)
> log.info("Count x -> " + ds.count)
> val encr = Encoders.product[ResultCC]
> val res = ds.map{ x =>
> val es: Long = x.header.tamp
> ResultCC(es = es)
> }(encr)
> res.write.parquet(this.resultPath)
> }
>
> def writeData(): Unit = {
> val ss = SparkUtil.getSparkSession(this.getClass.getName)
> import ss.implicits._
> val ds = ss.sparkContext.parallelize(Seq(MainCC(), MainCC())).toDF//.as[MainCC]
> log.info("source count 5 -> " + ds.count)
> import com.databricks.spark.avro._
> ds.write.avro(this.sourcePath)
> log.info("Written")
> }
>
> }
> final case class ResultCC(
> es: Long)
> *Case Class (Schema of source avro data)*
> package com.rj.enc
>
> case class Header(tamp: Long = 12, xy: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>
> case class Key(hi: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>
> case class L30 (
> l1: Option[Double] = Some(123d)
> ,l2: Option[Double] = Some(123d)
> ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class C45 (
> r1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,r2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class B45 (
> e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e2: Option[Int] = Some(123)
> ,e3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class D45 (`t1`: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>
> case class M30 (
> b1: Option[B45] = Some(B45())
> ,b2: Option[C45] = Some(C45())
> ,b3: Option[D45] = Some(D45())
> )
>
> case class Y50 (
> g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class X50 (
> c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class L10 (
> u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u5: Option[Y50] = Some(Y50())
> ,u6: Option[X50] = Some(X50())
> ,u7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class Z10 (
> m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class X10(
> i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i2: Option[L30] = Some(L30())
> ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i4: Option[M30] = Some(M30())
> ,i5: Option[Boolean] = Some(false)
> )
>
> case class R10 (
> t1: Option[Long] = Some(123l)
> ,t2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,t9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u3: Option[Map[String, Option[String]]] = Some(Map.empty)
> ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> case class A15(
> h1: Option[R10] = Some(R10())
> ,h2: Option[X10] = Some(X10())
> ,h3: Option[L10] = Some(L10())
> ,h4: Option[Z10] = Some(Z10())
> )
>
> case class B15(
> m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
> n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ){
> def toMap: Map[String,String]={
> Map(
> ("m1", this.m1),
> ("m2", this.m2),
> ("m3", this.m3),
> ("m4", this.m4),
> ("m5", this.m5),
> ("m6", this.m6),
> ("m7", this.m7),
> ("m8", this.m8),
> ("m9", this.m9),
> ("n1", this.n1),
> ("n2", this.n2),
> ("n3", this.n2),
> ("n4", this.n3)
> ).map(tup => {
> val (k,v) = tup
> (k, v.orNull)
> })
> }
> }
>
> case class Value (
> a1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a2: Option[Long] = Some(123l)
> ,a3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,a9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,b9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c6: Option[Double] = Some(1.23d)
> ,c7: Option[Double] = Some(1.1d)
> ,c8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,c9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d1: Option[Int] = Some(123)
> ,d2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d6: Option[Long] = Some(123)
> ,d7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,d9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e3: Option[Int] = Some(123)
> ,e4: Option[Int] = Some(234)
> ,e5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,f9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,g8: Option[Int] = Some(123)
> ,g9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h1: Option[Long] = Some(123l)
> ,h2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,h9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,j9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,k9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l5: Option[Int] = Some(123)
> ,l6: Option[Int] = Some(123)
> ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,l9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m3: Option[Map[String, Option[String]]] = Some(Map.empty)
> ,m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n5: Option[Boolean] = Some(true)
> ,n6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,n7: Option[A15] = Some(A15())
> ,n8: Option[B15] = Some(B15())
> ,n9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>
> final case class MainCC(date: Int = 20181008,
> header: Header = Header(), value: Value = Value(), key: Key = Key())
>
> +*build.sbt*+
> name := "sparkutil"
> version := "5.0"
> scalaVersion := "2.11.8"
> EclipseKeys.withSource := true
> scalacOptions ++= Seq(
> "-Ywarn-dead-code",
> "-Ywarn-unused"
> )
> val sparkVer = "2.3.1"
> libraryDependencies ++= Seq(
> "org.apache.spark" %% "spark-core" % sparkVer % "provided",
> "org.apache.spark" %% "spark-sql" % sparkVer % "provided",
> "org.apache.spark" %% "spark-hive" % sparkVer % "provided",
> "com.databricks" %% "spark-avro" % "4.0.0",
> "log4j" % "log4j" % "1.2.17",
> "com.github.scopt" %% "scopt" % "3.6.0",
> "com.googlecode.json-simple" % "json-simple" % "1.1.1",
> "com.google.cloud" % "google-cloud-bigquery" % "0.17.1-beta",
> "com.databricks" %% "spark-avro" % "4.0.0",
> "org.scalatest" %% "scalatest" % "3.0.5",
> "com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.8.3"
> )
> assemblyMergeStrategy in assembly := {
> case PathList("com", "google", xs @ _*) => MergeStrategy.last
> case PathList("org", "apache", "avro", xs @ _*) => MergeStrategy.last
> case "project.properties" => MergeStrategy.last
> case PathList("META-INF", xs @ _*) =>
> (xs map \{_.toLowerCase}) match {
> case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) => MergeStrategy.discard
> case _ => MergeStrategy.discard
> }
> case _ =>
> MergeStrategy.first
> }
> +*Exception Thrown*+ (PS: I have also increase the heap size in eclipse, but that does not solve the issue)
> 18/10/16 12:28:40 || ERROR || org.apache.spark.util.Utils || logError() || 91 || Aborting task
> java.lang.OutOfMemoryError: Java heap space
> at java.util.Arrays.copyOf(Arrays.java:3332)
> at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)
> at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)
> at java.lang.StringBuilder.append(StringBuilder.java:136)
> at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:200)
> at scala.collection.TraversableOnce$$anonfun$addString$1.apply(TraversableOnce.scala:364)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:357)
> at scala.collection.AbstractTraversable.addString(Traversable.scala:104)
> at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:323)
> at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
> at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:325)
> at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:137)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:138)
> at org.apache.spark.internal.Logging$class.logDebug(Logging.scala:58)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.logDebug(RuleExecutor.scala:40)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:134)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> at scala.collection.immutable.List.map(List.scala:285)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:354)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:32)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1321)
> 18/10/16 12:28:40 || ERROR || org.apache.spark.sql.execution.datasources.FileFormatWriter || logError() || 70 || Job job_20181016122823_0005 aborted.
> 18/10/16 12:28:40 || ERROR || org.apache.spark.executor.Executor || logError() || 91 || Exception in task 0.0 in stage 5.0 (TID 5)
> org.apache.spark.SparkException: Task failed while writing rows.
> at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285)
> at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org