You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2020/01/10 10:17:00 UTC

[jira] [Updated] (PARQUET-1745) No result for partition key included in Parquet file

     [ https://issues.apache.org/jira/browse/PARQUET-1745?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Yuming Wang updated PARQUET-1745:
---------------------------------
    Attachment: FilterByColumnIndex.png

> No result for partition key included in Parquet file
> ----------------------------------------------------
>
>                 Key: PARQUET-1745
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1745
>             Project: Parquet
>          Issue Type: Sub-task
>          Components: parquet-mr
>    Affects Versions: 1.11.0
>            Reporter: Yuming Wang
>            Priority: Major
>         Attachments: FilterByColumnIndex.png
>
>
> How to reproduce:
> {code:sh}
> git clone https://github.com/apache/spark.git && cd spark
> git fetch origin pull/26804/head:PARQUET-1745
> git checkout PARQUET-1745
> build/sbt "sql/test-only *ParquetV2PartitionDiscoverySuite"
> {code}
> output:
> {noformat}
> [info] - read partitioned table - partition key included in Parquet file *** FAILED *** (1 second, 57 milliseconds)
> [info]   Results do not match for query:
> [info]   Timezone: sun.util.calendar.ZoneInfo[id="America/Los_Angeles",offset=-28800000,dstSavings=3600000,useDaylight=true,transitions=185,lastRule=java.util.SimpleTimeZone[id=America/Los_Angeles,offset=-28800000,dstSavings=3600000,useDaylight=true,startYear=0,startMode=3,startMonth=2,startDay=8,startDayOfWeek=1,startTime=7200000,startTimeMode=0,endMode=3,endMonth=10,endDay=1,endDayOfWeek=1,endTime=7200000,endTimeMode=0]]
> [info]   Timezone Env:
> [info]
> [info]   == Parsed Logical Plan ==
> [info]   'Project [*]
> [info]   +- 'Filter ('pi = 1)
> [info]      +- 'UnresolvedRelation [t]
> [info]
> [info]   == Analyzed Logical Plan ==
> [info]   intField: int, stringField: string, pi: int, ps: string
> [info]   Project [intField#1788, stringField#1789, pi#1790, ps#1791]
> [info]   +- Filter (pi#1790 = 1)
> [info]      +- SubqueryAlias `t`
> [info]         +- RelationV2[intField#1788, stringField#1789, pi#1790, ps#1791] parquet file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f48be3b74a
> [info]
> [info]   == Optimized Logical Plan ==
> [info]   Filter (isnotnull(pi#1790) AND (pi#1790 = 1))
> [info]   +- RelationV2[intField#1788, stringField#1789, pi#1790, ps#1791] parquet file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f48be3b74a
> [info]
> [info]   == Physical Plan ==
> [info]   *(1) Project [intField#1788, stringField#1789, pi#1790, ps#1791]
> [info]   +- *(1) Filter (isnotnull(pi#1790) AND (pi#1790 = 1))
> [info]      +- *(1) ColumnarToRow
> [info]         +- BatchScan[intField#1788, stringField#1789, pi#1790, ps#1791] ParquetScan Location: InMemoryFileIndex[file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f..., ReadSchema: struct<intField:int,stringField:string>, PushedFilters: [IsNotNull(pi), EqualTo(pi,1)]
> [info]
> [info]   == Results ==
> [info]
> [info]   == Results ==
> [info]   !== Correct Answer - 20 ==   == Spark Answer - 0 ==
> [info]    struct<>                    struct<>
> [info]   ![1,1,1,bar]
> [info]   ![1,1,1,foo]
> [info]   ![10,10,1,bar]
> [info]   ![10,10,1,foo]
> [info]   ![2,2,1,bar]
> [info]   ![2,2,1,foo]
> [info]   ![3,3,1,bar]
> [info]   ![3,3,1,foo]
> [info]   ![4,4,1,bar]
> [info]   ![4,4,1,foo]
> [info]   ![5,5,1,bar]
> [info]   ![5,5,1,foo]
> [info]   ![6,6,1,bar]
> [info]   ![6,6,1,foo]
> [info]   ![7,7,1,bar]
> [info]   ![7,7,1,foo]
> [info]   ![8,8,1,bar]
> [info]   ![8,8,1,foo]
> [info]   ![9,9,1,bar]
> [info]   ![9,9,1,foo] (QueryTest.scala:248)
> [info]   org.scalatest.exceptions.TestFailedException:
> [info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
> [info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
> [info]   at org.apache.spark.sql.QueryTest$.newAssertionFailedException(QueryTest.scala:238)
> [info]   at org.scalatest.Assertions.fail(Assertions.scala:1091)
> [info]   at org.scalatest.Assertions.fail$(Assertions.scala:1087)
> [info]   at org.apache.spark.sql.QueryTest$.fail(QueryTest.scala:238)
> [info]   at org.apache.spark.sql.QueryTest$.checkAnswer(QueryTest.scala:248)
> [info]   at org.apache.spark.sql.QueryTest.checkAnswer(QueryTest.scala:156)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$194(ParquetPartitionDiscoverySuite.scala:1232)
> [info]   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> [info]   at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> [info]   at org.apache.spark.sql.test.SQLTestUtilsBase.withTempView(SQLTestUtils.scala:260)
> [info]   at org.apache.spark.sql.test.SQLTestUtilsBase.withTempView$(SQLTestUtils.scala:258)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.withTempView(ParquetPartitionDiscoverySuite.scala:53)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$190(ParquetPartitionDiscoverySuite.scala:1212)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$190$adapted(ParquetPartitionDiscoverySuite.scala:1200)
> [info]   at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1(SQLTestUtils.scala:76)
> [info]   at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1$adapted(SQLTestUtils.scala:75)
> [info]   at org.apache.spark.SparkFunSuite.withTempDir(SparkFunSuite.scala:161)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.org$apache$spark$sql$test$SQLTestUtils$$super$withTempDir(ParquetPartitionDiscoverySuite.scala:53)
> [info]   at org.apache.spark.sql.test.SQLTestUtils.withTempDir(SQLTestUtils.scala:75)
> [info]   at org.apache.spark.sql.test.SQLTestUtils.withTempDir$(SQLTestUtils.scala:74)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.withTempDir(ParquetPartitionDiscoverySuite.scala:53)
> [info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$189(ParquetPartitionDiscoverySuite.scala:1200)
> [info]   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> [info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
> [info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
> [info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> [info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
> [info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
> [info]   at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
> [info]   at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:149)
> [info]   at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184)
> [info]   at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196)
> [info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286)
> [info]   at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196)
> [info]   at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178)
> [info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:56)
> [info]   at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:221)
> [info]   at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:214)
> [info]   at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:56)
> [info]   at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229)
> [info]   at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:393)
> [info]   at scala.collection.immutable.List.foreach(List.scala:392)
> [info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381)
> [info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:376)
> [info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458)
> [info]   at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229)
> [info]   at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228)
> [info]   at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
> [info]   at org.scalatest.Suite.run(Suite.scala:1124)
> [info]   at org.scalatest.Suite.run$(Suite.scala:1106)
> [info]   at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
> [info]   at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233)
> [info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:518)
> [info]   at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233)
> [info]   at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232)
> [info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:56)
> [info]   at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
> [info]   at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
> [info]   at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
> [info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:56)
> [info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:317)
> [info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:510)
> [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:296)
> [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:286)
> [info]   at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
> [info]   at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> [info]   at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> [info]   at java.base/java.lang.Thread.run(Thread.java:834)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)