You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Brandon White <bw...@gmail.com> on 2015/07/19 21:37:31 UTC

DataFrame Union not passing optimizer assertion

Hello! So I am doing a union of two dataframes with the same schema but a
different number of rows. However, I am unable to pass an assertion. I
think it is this one here
<https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala>
but I am not sure. Any ideas why this assertion isn't passing?

java.lang.AssertionError: assertion failed at
scala.Predef$.assert(Predef.scala:165) at
org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.buildRewrites(Optimizer.scala:72)
at
org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1.applyOrElse(Optimizer.scala:102)
at
org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1.applyOrElse(Optimizer.scala:92)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:188)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:188)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:187)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:208)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at
scala.collection.Iterator$class.foreach(Iterator.scala:727) at
scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) at
scala.collection.AbstractIterator.to(Iterator.scala:1157) at
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) at
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) at
org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:238)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:193)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:178)
at
org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.apply(Optimizer.scala:92)
at
org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.apply(Optimizer.scala:66)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
at
scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51)
at
scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60)
at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34) at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
at scala.collection.immutable.List.foreach(List.scala:318) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
at
org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:1087)
at
org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:1087)
at
org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:1092)
at
org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:1090)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:1096)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:1096)
at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:887)