You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Raj (JIRA)" <ji...@apache.org> on 2018/10/16 17:39:00 UTC

[jira] [Created] (SPARK-25749) Exception thrown while reading avro file with large schema

Raj created SPARK-25749:
---------------------------

             Summary: Exception thrown while reading avro file with large schema
                 Key: SPARK-25749
                 URL: https://issues.apache.org/jira/browse/SPARK-25749
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.3.2, 2.3.1, 2.3.0
            Reporter: Raj


Hi, We are migrating our jobs from Spark 2.2.0 to Spark 2.3.1. One of the job reads avro source that has large nested schema. The job fails for Spark 2.3.1(Have tested in Spark 2.3.0 & Spark 2.3.2 and the job fails in this case also). I am able to replicate this with some sample data. Please find below the code, build file & exception log

*Code (EncoderExample.scala)*

 

package com.rj.enc

import com.rj.logger.CustomLogger
import org.apache.log4j.Logger
import com.rj.sc.SparkUtil
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.Encoders


object EncoderExample {
 
 val log: Logger = CustomLogger.getLogger(this.getClass.getName.dropRight(1))
 val user = "xxx"
 val sourcePath = s"file:///Users/$user/del/avrodata"
 val resultPath = s"file:///Users/$user/del/pqdata"
 
 def main(args: Array[String]): Unit = {
 writeData() // Create sample data 
 readData() // Read, Process & write back the results (App fails in this method for spark 2.3.1)
 }
 
 def readData(): Unit = {
 log.info("sourcePath -> " + sourcePath)
 val ss = SparkUtil.getSparkSession(this.getClass.getName)
 val schema = ScalaReflection.schemaFor[MainCC].dataType.asInstanceOf[StructType]
 import com.databricks.spark.avro._
 import ss.implicits._
 val ds = ss.sqlContext.read.schema(schema).option("basePath", sourcePath).
 avro(this.sourcePath).as[MainCC]
 log.info("Schema -> " + ds.schema.treeString)
 log.info("Count x -> " + ds.count)
 val encr = Encoders.product[ResultCC]
 val res = ds.map{ x =>
 val es: Long = x.header.tamp
 ResultCC(es = es)
 }(encr)
 res.write.parquet(this.resultPath)
 }
 
 def writeData(): Unit = {
 val ss = SparkUtil.getSparkSession(this.getClass.getName)
 import ss.implicits._
 val ds = ss.sparkContext.parallelize(Seq(MainCC(), MainCC())).toDF//.as[MainCC]
 log.info("source count 5 -> " + ds.count)
 import com.databricks.spark.avro._
 ds.write.avro(this.sourcePath)
 log.info("Written")
 }
 
}

final case class ResultCC(
 es: Long)

*Case Class (Schema of source avro data)*

package com.rj.enc

 

case class Header(tamp: Long = 12, xy: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))

 

case class Key(hi: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))

 

case class L30 (

  l1: Option[Double] = Some(123d)

,l2: Option[Double] = Some(123d)

 ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,l5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,l6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class C45 (

 r1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

,r2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class B45 (

  e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

,e2: Option[Int] = Some(123)

 ,e3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class D45 (`t1`: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))

 

case class M30 (

b1: Option[B45] = Some(B45())

,b2: Option[C45] = Some(C45())

,b3: Option[D45] = Some(D45())

)

 

case class Y50 (

   g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class X50 (

  c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class L10 (

  u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,u3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

 ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

,u5: Option[Y50] = Some(Y50())

,u6: Option[X50] = Some(X50())

 ,u7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class Z10 (

  m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class X10(

  i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

,i2: Option[L30] = Some(L30())

 ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

,i4: Option[M30] = Some(M30())

,i5: Option[Boolean] = Some(false)

)

 

case class R10 (

   t1: Option[Long] = Some(123l)

  ,t2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,t9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,u3: Option[Map[String, Option[String]]] = Some(Map.empty)

  ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

case class A15(

   h1: Option[R10] = Some(R10())

  ,h2: Option[X10] = Some(X10())

  ,h3: Option[L10] = Some(L10())

  ,h4: Option[Z10] = Some(Z10())

)

 

case class B15(

   m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),

   n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

){

  def toMap: Map[String,String]={

    Map(

      ("m1", this.m1),

      ("m2", this.m2),

      ("m3", this.m3),

      ("m4", this.m4),

      ("m5", this.m5),

      ("m6", this.m6),

      ("m7", this.m7),

      ("m8", this.m8),

      ("m9", this.m9),

      ("n1", this.n1),

      ("n2", this.n2),

      ("n3", this.n2),

      ("n4", this.n3)

    ).map(tup => {

      val (k,v) = tup

      (k, v.orNull)

    })

  }

}

 

case class Value (

   a1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a2: Option[Long] = Some(123l)

  ,a3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,a9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,b9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c6: Option[Double] = Some(1.23d)

  ,c7: Option[Double] = Some(1.1d)

  ,c8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,c9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d1: Option[Int] = Some(123)

  ,d2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d6: Option[Long] = Some(123)

  ,d7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,d9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e3: Option[Int] = Some(123)

  ,e4: Option[Int] = Some(234)

  ,e5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,e9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,f9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,g8: Option[Int] = Some(123)

  ,g9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h1: Option[Long] = Some(123l)

  ,h2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,h9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,i9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,j9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,k9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l5: Option[Int] = Some(123)

  ,l6: Option[Int] = Some(123)

  ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,l9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m3: Option[Map[String, Option[String]]] = Some(Map.empty)

  ,m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n5: Option[Boolean] = Some(true)

  ,n6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

  ,n7: Option[A15] = Some(A15())

  ,n8: Option[B15] = Some(B15())

  ,n9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")

)

 

final case class MainCC(date: Int = 20181008,

    header: Header = Header(), value: Value = Value(), key: Key = Key())

 

+*build.sbt*+

name := "sparkutil"

version := "5.0"

scalaVersion := "2.11.8"

EclipseKeys.withSource := true

scalacOptions ++= Seq(

  "-Ywarn-dead-code",

  "-Ywarn-unused"

)

val sparkVer = "2.3.1"

libraryDependencies ++= Seq(

  "org.apache.spark" %% "spark-core" % sparkVer % "provided",

  "org.apache.spark" %% "spark-sql" % sparkVer % "provided",

  "org.apache.spark" %% "spark-hive" % sparkVer % "provided", 

"com.databricks" %% "spark-avro" % "4.0.0",

  "log4j" % "log4j" % "1.2.17",

  "com.github.scopt" %% "scopt" % "3.6.0",

  "com.googlecode.json-simple" % "json-simple" % "1.1.1",

"com.google.cloud" % "google-cloud-bigquery" % "0.17.1-beta",

  "com.databricks" %% "spark-avro" % "4.0.0",

  "org.scalatest" %% "scalatest" % "3.0.5",

  "com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.8.3"

)

 assemblyMergeStrategy in assembly := {

  case PathList("com", "google", xs @ _*) => MergeStrategy.last

case PathList("org", "apache", "avro", xs @ _*) => MergeStrategy.last

  case "project.properties" => MergeStrategy.last

case PathList("META-INF", xs @ _*) =>

    (xs map \{_.toLowerCase}) match {

      case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) => MergeStrategy.discard

      case _ => MergeStrategy.discard

    }

  case _ =>

    MergeStrategy.first

}

+*Exception Thrown*+ (PS: I have also increase the heap size in eclipse, but that does not solve the issue)

18/10/16 12:28:40 || ERROR || org.apache.spark.util.Utils || logError() || 91 || Aborting task

java.lang.OutOfMemoryError: Java heap space

at java.util.Arrays.copyOf(Arrays.java:3332)

at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)

at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)

at java.lang.StringBuilder.append(StringBuilder.java:136)

at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:200)

at scala.collection.TraversableOnce$$anonfun$addString$1.apply(TraversableOnce.scala:364)

at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:357)

at scala.collection.AbstractTraversable.addString(Traversable.scala:104)

at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:323)

at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)

at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:325)

at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)

at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:137)

at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:138)

at org.apache.spark.internal.Logging$class.logDebug(Logging.scala:58)

at org.apache.spark.sql.catalyst.rules.RuleExecutor.logDebug(RuleExecutor.scala:40)

at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:134)

at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)

at scala.collection.immutable.List.foreach(List.scala:381)

at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)

at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)

at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)

at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)

at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)

at scala.collection.immutable.List.foreach(List.scala:381)

at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)

at scala.collection.immutable.List.map(List.scala:285)

at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:354)

at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:32)

at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1321)

18/10/16 12:28:40 || ERROR || org.apache.spark.sql.execution.datasources.FileFormatWriter || logError() || 70 || Job job_20181016122823_0005 aborted.

18/10/16 12:28:40 || ERROR || org.apache.spark.executor.Executor || logError() || 91 || Exception in task 0.0 in stage 5.0 (TID 5)

org.apache.spark.SparkException: Task failed while writing rows.

at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285)

at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)

 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org