You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2021/01/20 16:01:00 UTC

[jira] [Commented] (PARQUET-1746) Changed the data order after DataFrame reuse

    [ https://issues.apache.org/jira/browse/PARQUET-1746?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17268661#comment-17268661 ] 

Yuming Wang commented on PARQUET-1746:
--------------------------------------

https://github.com/apache/spark/pull/26804#discussion_r561044576

> Changed the data order after DataFrame reuse
> --------------------------------------------
>
>                 Key: PARQUET-1746
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1746
>             Project: Parquet
>          Issue Type: Sub-task
>          Components: parquet-mr
>    Affects Versions: 1.11.0
>            Reporter: Yuming Wang
>            Priority: Major
>
> How to reproduce:
> {code:sh}
> git clone https://github.com/apache/spark.git && cd spark
> git fetch origin pull/26804/head:PARQUET-1746
> git checkout PARQUET-1746
> build/sbt "sql/test-only *StreamSuite"
> {code}
> output:
> {noformat}
> sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: 
> Decoded objects do not match expected objects:
> expected: WrappedArray(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
> actual:   WrappedArray(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 2)
> assertnotnull(upcast(getcolumnbyordinal(0, LongType), LongType, - root class: "scala.Long"))
> +- upcast(getcolumnbyordinal(0, LongType), LongType, - root class: "scala.Long")
>    +- getcolumnbyordinal(0, LongType)
>          
> 	at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
> 	at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
> 	at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
> 	at org.scalatest.Assertions.fail(Assertions.scala:1091)
> 	at org.scalatest.Assertions.fail$(Assertions.scala:1087)
> 	at org.scalatest.FunSuite.fail(FunSuite.scala:1560)
> 	at org.apache.spark.sql.QueryTest.checkDataset(QueryTest.scala:73)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$22(StreamSuite.scala:215)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$22$adapted(StreamSuite.scala:208)
> 	at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1(SQLTestUtils.scala:76)
> 	at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1$adapted(SQLTestUtils.scala:75)
> 	at org.apache.spark.SparkFunSuite.withTempDir(SparkFunSuite.scala:161)
> 	at org.apache.spark.sql.streaming.StreamSuite.org$apache$spark$sql$test$SQLTestUtils$$super$withTempDir(StreamSuite.scala:51)
> 	at org.apache.spark.sql.test.SQLTestUtils.withTempDir(SQLTestUtils.scala:75)
> 	at org.apache.spark.sql.test.SQLTestUtils.withTempDir$(SQLTestUtils.scala:74)
> 	at org.apache.spark.sql.streaming.StreamSuite.withTempDir(StreamSuite.scala:51)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$21(StreamSuite.scala:208)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$21$adapted(StreamSuite.scala:207)
> 	at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1(SQLTestUtils.scala:76)
> 	at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1$adapted(SQLTestUtils.scala:75)
> 	at org.apache.spark.SparkFunSuite.withTempDir(SparkFunSuite.scala:161)
> 	at org.apache.spark.sql.streaming.StreamSuite.org$apache$spark$sql$test$SQLTestUtils$$super$withTempDir(StreamSuite.scala:51)
> 	at org.apache.spark.sql.test.SQLTestUtils.withTempDir(SQLTestUtils.scala:75)
> 	at org.apache.spark.sql.test.SQLTestUtils.withTempDir$(SQLTestUtils.scala:74)
> 	at org.apache.spark.sql.streaming.StreamSuite.withTempDir(StreamSuite.scala:51)
> 	at org.apache.spark.sql.streaming.StreamSuite.assertDF$1(StreamSuite.scala:207)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$25(StreamSuite.scala:226)
> 	at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf(SQLHelper.scala:52)
> 	at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf$(SQLHelper.scala:36)
> 	at org.apache.spark.sql.streaming.StreamSuite.org$apache$spark$sql$test$SQLTestUtilsBase$$super$withSQLConf(StreamSuite.scala:51)
> 	at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf(SQLTestUtils.scala:231)
> 	at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf$(SQLTestUtils.scala:229)
> 	at org.apache.spark.sql.streaming.StreamSuite.withSQLConf(StreamSuite.scala:51)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$24(StreamSuite.scala:225)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$24$adapted(StreamSuite.scala:224)
> 	at scala.collection.immutable.List.foreach(List.scala:392)
> 	at org.apache.spark.sql.streaming.StreamSuite.$anonfun$new$20(StreamSuite.scala:224)
> 	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
> 	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
> 	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> 	at org.scalatest.Transformer.apply(Transformer.scala:22)
> 	at org.scalatest.Transformer.apply(Transformer.scala:20)
> 	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
> 	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:149)
> 	at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184)
> 	at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196)
> 	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286)
> 	at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196)
> 	at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178)
> 	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:56)
> 	at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:221)
> 	at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:214)
> 	at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:56)
> 	at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229)
> 	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:393)
> 	at scala.collection.immutable.List.foreach(List.scala:392)
> 	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381)
> 	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:376)
> 	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458)
> 	at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229)
> 	at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228)
> 	at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
> 	at org.scalatest.Suite.run(Suite.scala:1124)
> 	at org.scalatest.Suite.run$(Suite.scala:1106)
> 	at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
> 	at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233)
> 	at org.scalatest.SuperEngine.runImpl(Engine.scala:518)
> 	at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233)
> 	at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232)
> 	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:56)
> 	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
> 	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
> 	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
> 	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:56)
> 	at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:317)
> 	at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:510)
> 	at sbt.ForkMain$Run$2.call(ForkMain.java:296)
> 	at sbt.ForkMain$Run$2.call(ForkMain.java:286)
> 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)