You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Jungtaek Lim (Jira)" <ji...@apache.org> on 2019/09/18 09:27:00 UTC
[jira] [Commented] (SPARK-29140) Flaky test: org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.randomized aggregation test - [with partial + unsafe, with distinct] - with grouping keys

    [ https://issues.apache.org/jira/browse/SPARK-29140?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16932238#comment-16932238 ] 

Jungtaek Lim commented on SPARK-29140:
--------------------------------------

I'm looking into this.

> Flaky test: org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.randomized aggregation test - [with partial + unsafe, with distinct] - with grouping keys
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-29140
>                 URL: https://issues.apache.org/jira/browse/SPARK-29140
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL, Tests
>    Affects Versions: 3.0.0
>            Reporter: Jungtaek Lim
>            Priority: Major
>
> [https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/110840/testReport/]
> {code:java}
> sbt.ForkMain$ForkError: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange rangepartitioning(group#100846 ASC NULLS FIRST, 5), true, [id=#110477]
> +- *(3) HashAggregate(keys=[(id#100836 % 10)#100858], functions=[max(c1#100838), count(distinct c3#100840)], output=[group#100846, max(c1)#100852, count(c3)#100853L])
>    +- Exchange hashpartitioning((id#100836 % 10)#100858, 5), true, [id=#110473]
>       +- *(2) HashAggregate(keys=[(id#100836 % 10)#100858], functions=[merge_max(c1#100838), partial_count(distinct c3#100840)], output=[(id#100836 % 10)#100858, max#100860, count#100863L])
>          +- *(2) HashAggregate(keys=[(id#100836 % 10)#100858, c3#100840], functions=[merge_max(c1#100838)], output=[(id#100836 % 10)#100858, c3#100840, max#100860])
>             +- Exchange hashpartitioning((id#100836 % 10)#100858, c3#100840, 5), true, [id=#110468]
>                +- *(1) HashAggregate(keys=[(id#100836 % 10) AS (id#100836 % 10)#100858, c3#100840], functions=[partial_max(c1#100838)], output=[(id#100836 % 10)#100858, c3#100840, max#100860])
>                   +- *(1) Project [id#100836, c1#100838, c3#100840]
>                      +- *(1) Scan ExistingRDD[id#100836,c0#100837,c1#100838,c2#100839,c3#100840]
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:90)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:524)
> 	at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:452)
> 	at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:451)
> 	at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:495)
> 	at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:124)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:717)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:329)
> 	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:378)
> 	at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3382)
> 	at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2740)
> 	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3372)
> 	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100)
> 	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
> 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87)
> 	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3368)
> 	at org.apache.spark.sql.Dataset.collect(Dataset.scala:2740)
> 	at org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.$anonfun$makeRandomizedTests$8(ObjectHashAggregateSuite.scala:352)
> 	at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf(SQLHelper.scala:47)
> 	at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf$(SQLHelper.scala:31)
> 	at org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.org$apache$spark$sql$test$SQLTestUtilsBase$$super$withSQLConf(ObjectHashAggregateSuite.scala:37)
> 	at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf(SQLTestUtils.scala:231)
> 	at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf$(SQLTestUtils.scala:229)
> 	at org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.withSQLConf(ObjectHashAggregateSuite.scala:37)
> 	at org.apache.spark.sql.hive.execution.ObjectHashAggregateSuite.$anonfun$makeRandomizedTests$7(ObjectHashAggregateSuite.scala:339)
> 	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
> 	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
> 	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> 	at org.scalatest.Transformer.apply(Transformer.scala:22)
> 	at org.scalatest.Transformer.apply(Transformer.scala:20)
> 	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
> 	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:149)
> 	at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184)
> 	at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196)
> 	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
> 	at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196)
> 	at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178)
> 	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:56)
> 	at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:221)
> 	at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:214)
> 	at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:56)
> 	at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229)
> 	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396)
> 	at scala.collection.immutable.List.foreach(List.scala:392)
> 	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
> 	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379)
> 	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
> 	at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229)
> 	at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228)
> 	at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
> 	at org.scalatest.Suite.run(Suite.scala:1147)
> 	at org.scalatest.Suite.run$(Suite.scala:1129)
> 	at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
> 	at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233)
> 	at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
> 	at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233)
> 	at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232)
> 	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:56)
> 	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
> 	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
> 	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
> 	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:56)
> 	at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:314)
> 	at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:507)
> 	at sbt.ForkMain$Run$2.call(ForkMain.java:296)
> 	at sbt.ForkMain$Run$2.call(ForkMain.java:286)
> 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
> Caused by: sbt.ForkMain$ForkError: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange hashpartitioning((id#100836 % 10)#100858, 5), true, [id=#110473]
> +- *(2) HashAggregate(keys=[(id#100836 % 10)#100858], functions=[merge_max(c1#100838), partial_count(distinct c3#100840)], output=[(id#100836 % 10)#100858, max#100860, count#100863L])
>    +- *(2) HashAggregate(keys=[(id#100836 % 10)#100858, c3#100840], functions=[merge_max(c1#100838)], output=[(id#100836 % 10)#100858, c3#100840, max#100860])
>       +- Exchange hashpartitioning((id#100836 % 10)#100858, c3#100840, 5), true, [id=#110468]
>          +- *(1) HashAggregate(keys=[(id#100836 % 10) AS (id#100836 % 10)#100858, c3#100840], functions=[partial_max(c1#100838)], output=[(id#100836 % 10)#100858, c3#100840, max#100860])
>             +- *(1) Project [id#100836, c1#100838, c3#100840]
>                +- *(1) Scan ExistingRDD[id#100836,c0#100837,c1#100838,c2#100839,c3#100840]
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:90)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:524)
> 	at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:452)
> 	at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:451)
> 	at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:495)
> 	at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:160)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:717)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:74)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:72)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.createShuffledRDD(ShuffleExchangeExec.scala:82)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:93)
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
> 	... 81 more
> Caused by: sbt.ForkMain$ForkError: java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 380, Column 38: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 380, Column 38: IDENTIFIER expected instead of '['
> 	at com.google.common.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)
> 	at com.google.common.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)
> 	at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)
> 	at com.google.common.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)
> 	at com.google.common.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)
> 	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)
> 	at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
> 	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)
> 	at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
> 	at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4004)
> 	at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1314)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:692)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:691)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:74)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:72)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.createShuffledRDD(ShuffleExchangeExec.scala:82)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:93)
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
> 	... 105 more
> Caused by: sbt.ForkMain$ForkError: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 380, Column 38: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 380, Column 38: IDENTIFIER expected instead of '['
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1380)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1456)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1453)
> 	at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> 	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> 	... 125 more {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org