You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Raj (JIRA)" <ji...@apache.org> on 2018/10/16 17:39:00 UTC
[jira] [Created] (SPARK-25749) Exception thrown while reading avro
file with large schema
Raj created SPARK-25749:
---------------------------
Summary: Exception thrown while reading avro file with large schema
Key: SPARK-25749
URL: https://issues.apache.org/jira/browse/SPARK-25749
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 2.3.2, 2.3.1, 2.3.0
Reporter: Raj
Hi, We are migrating our jobs from Spark 2.2.0 to Spark 2.3.1. One of the job reads avro source that has large nested schema. The job fails for Spark 2.3.1(Have tested in Spark 2.3.0 & Spark 2.3.2 and the job fails in this case also). I am able to replicate this with some sample data. Please find below the code, build file & exception log
*Code (EncoderExample.scala)*
package com.rj.enc
import com.rj.logger.CustomLogger
import org.apache.log4j.Logger
import com.rj.sc.SparkUtil
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.Encoders
object EncoderExample {
val log: Logger = CustomLogger.getLogger(this.getClass.getName.dropRight(1))
val user = "xxx"
val sourcePath = s"file:///Users/$user/del/avrodata"
val resultPath = s"file:///Users/$user/del/pqdata"
def main(args: Array[String]): Unit = {
writeData() // Create sample data
readData() // Read, Process & write back the results (App fails in this method for spark 2.3.1)
}
def readData(): Unit = {
log.info("sourcePath -> " + sourcePath)
val ss = SparkUtil.getSparkSession(this.getClass.getName)
val schema = ScalaReflection.schemaFor[MainCC].dataType.asInstanceOf[StructType]
import com.databricks.spark.avro._
import ss.implicits._
val ds = ss.sqlContext.read.schema(schema).option("basePath", sourcePath).
avro(this.sourcePath).as[MainCC]
log.info("Schema -> " + ds.schema.treeString)
log.info("Count x -> " + ds.count)
val encr = Encoders.product[ResultCC]
val res = ds.map{ x =>
val es: Long = x.header.tamp
ResultCC(es = es)
}(encr)
res.write.parquet(this.resultPath)
}
def writeData(): Unit = {
val ss = SparkUtil.getSparkSession(this.getClass.getName)
import ss.implicits._
val ds = ss.sparkContext.parallelize(Seq(MainCC(), MainCC())).toDF//.as[MainCC]
log.info("source count 5 -> " + ds.count)
import com.databricks.spark.avro._
ds.write.avro(this.sourcePath)
log.info("Written")
}
}
final case class ResultCC(
es: Long)
*Case Class (Schema of source avro data)*
package com.rj.enc
case class Header(tamp: Long = 12, xy: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
case class Key(hi: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
case class L30 (
l1: Option[Double] = Some(123d)
,l2: Option[Double] = Some(123d)
,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class C45 (
r1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,r2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class B45 (
e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e2: Option[Int] = Some(123)
,e3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class D45 (`t1`: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
case class M30 (
b1: Option[B45] = Some(B45())
,b2: Option[C45] = Some(C45())
,b3: Option[D45] = Some(D45())
)
case class Y50 (
g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class X50 (
c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class L10 (
u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u5: Option[Y50] = Some(Y50())
,u6: Option[X50] = Some(X50())
,u7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class Z10 (
m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class X10(
i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i2: Option[L30] = Some(L30())
,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i4: Option[M30] = Some(M30())
,i5: Option[Boolean] = Some(false)
)
case class R10 (
t1: Option[Long] = Some(123l)
,t2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,t9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,u3: Option[Map[String, Option[String]]] = Some(Map.empty)
,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
case class A15(
h1: Option[R10] = Some(R10())
,h2: Option[X10] = Some(X10())
,h3: Option[L10] = Some(L10())
,h4: Option[Z10] = Some(Z10())
)
case class B15(
m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
){
def toMap: Map[String,String]={
Map(
("m1", this.m1),
("m2", this.m2),
("m3", this.m3),
("m4", this.m4),
("m5", this.m5),
("m6", this.m6),
("m7", this.m7),
("m8", this.m8),
("m9", this.m9),
("n1", this.n1),
("n2", this.n2),
("n3", this.n2),
("n4", this.n3)
).map(tup => {
val (k,v) = tup
(k, v.orNull)
})
}
}
case class Value (
a1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a2: Option[Long] = Some(123l)
,a3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,a9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,b9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c6: Option[Double] = Some(1.23d)
,c7: Option[Double] = Some(1.1d)
,c8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,c9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d1: Option[Int] = Some(123)
,d2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d6: Option[Long] = Some(123)
,d7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,d9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e3: Option[Int] = Some(123)
,e4: Option[Int] = Some(234)
,e5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,e9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,f9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,g8: Option[Int] = Some(123)
,g9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h1: Option[Long] = Some(123l)
,h2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,h9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,i9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,j9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,k9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l5: Option[Int] = Some(123)
,l6: Option[Int] = Some(123)
,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,l9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m3: Option[Map[String, Option[String]]] = Some(Map.empty)
,m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n5: Option[Boolean] = Some(true)
,n6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
,n7: Option[A15] = Some(A15())
,n8: Option[B15] = Some(B15())
,n9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
)
final case class MainCC(date: Int = 20181008,
header: Header = Header(), value: Value = Value(), key: Key = Key())
+*build.sbt*+
name := "sparkutil"
version := "5.0"
scalaVersion := "2.11.8"
EclipseKeys.withSource := true
scalacOptions ++= Seq(
"-Ywarn-dead-code",
"-Ywarn-unused"
)
val sparkVer = "2.3.1"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVer % "provided",
"org.apache.spark" %% "spark-sql" % sparkVer % "provided",
"org.apache.spark" %% "spark-hive" % sparkVer % "provided",
"com.databricks" %% "spark-avro" % "4.0.0",
"log4j" % "log4j" % "1.2.17",
"com.github.scopt" %% "scopt" % "3.6.0",
"com.googlecode.json-simple" % "json-simple" % "1.1.1",
"com.google.cloud" % "google-cloud-bigquery" % "0.17.1-beta",
"com.databricks" %% "spark-avro" % "4.0.0",
"org.scalatest" %% "scalatest" % "3.0.5",
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.8.3"
)
assemblyMergeStrategy in assembly := {
case PathList("com", "google", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", "avro", xs @ _*) => MergeStrategy.last
case "project.properties" => MergeStrategy.last
case PathList("META-INF", xs @ _*) =>
(xs map \{_.toLowerCase}) match {
case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) => MergeStrategy.discard
case _ => MergeStrategy.discard
}
case _ =>
MergeStrategy.first
}
+*Exception Thrown*+ (PS: I have also increase the heap size in eclipse, but that does not solve the issue)
18/10/16 12:28:40 || ERROR || org.apache.spark.util.Utils || logError() || 91 || Aborting task
java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3332)
at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)
at java.lang.StringBuilder.append(StringBuilder.java:136)
at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:200)
at scala.collection.TraversableOnce$$anonfun$addString$1.apply(TraversableOnce.scala:364)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:357)
at scala.collection.AbstractTraversable.addString(Traversable.scala:104)
at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:323)
at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:325)
at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:137)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:138)
at org.apache.spark.internal.Logging$class.logDebug(Logging.scala:58)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.logDebug(RuleExecutor.scala:40)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:134)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:354)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:32)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1321)
18/10/16 12:28:40 || ERROR || org.apache.spark.sql.execution.datasources.FileFormatWriter || logError() || 70 || Job job_20181016122823_0005 aborted.
18/10/16 12:28:40 || ERROR || org.apache.spark.executor.Executor || logError() || 91 || Exception in task 0.0 in stage 5.0 (TID 5)
org.apache.spark.SparkException: Task failed while writing rows.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org