You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/11/03 08:06:42 UTC

[GitHub] [spark] HyukjinKwon commented on a diff in pull request #38277: [SPARK-40815][SQL] Add `DelegateSymlinkTextInputFormat` to workaround `SymlinkTextInputSplit` bug

HyukjinKwon commented on code in PR #38277:
URL: https://github.com/apache/spark/pull/38277#discussion_r1012592088


##########
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala:
##########
@@ -218,4 +225,86 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       checkAnswer(spark.table("t1"), Seq(Row(Array("SPARK-34512", "HIVE-24797"))))
     }
   }
+
+  test("SPARK-40815: DelegateSymlinkTextInputFormat serialization") {

Review Comment:
   This test fails in JDK 11 and 17 😢 
   https://github.com/apache/spark/actions/runs/3379157338/jobs/5610899432
   https://github.com/apache/spark/actions/runs/3381461270/jobs/5615405153
   
   ```
   [info] - SPARK-40815: Read SymlinkTextInputFormat *** FAILED *** (587 milliseconds)
   [info]   Results do not match for query:
   [info]   Timezone: sun.util.calendar.ZoneInfo[id="America/Los_Angeles",offset=-28800000,dstSavings=3600000,useDaylight=true,transitions=185,lastRule=java.util.SimpleTimeZone[id=America/Los_Angeles,offset=-28800000,dstSavings=3600000,useDaylight=true,startYear=0,startMode=3,startMonth=2,startDay=8,startDayOfWeek=1,startTime=7200000,startTimeMode=0,endMode=3,endMonth=10,endDay=1,endDayOfWeek=1,endTime=7200000,endTimeMode=0]]
   [info]   Timezone Env: 
   [info]   
   [info]   == Parsed Logical Plan ==
   [info]   'Sort ['id ASC NULLS FIRST], true
   [info]   +- 'Project ['id]
   [info]      +- 'UnresolvedRelation [t], [], false
   [info]   
   [info]   == Analyzed Logical Plan ==
   [info]   id: bigint
   [info]   Sort [id#175602L ASC NULLS FIRST], true
   [info]   +- Project [id#175602L]
   [info]      +- SubqueryAlias spark_catalog.default.t
   [info]         +- HiveTableRelation [`spark_catalog`.`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#175602L], Partition Cols: []]
   [info]   
   [info]   == Optimized Logical Plan ==
   [info]   Sort [id#175602L ASC NULLS FIRST], true
   [info]   +- HiveTableRelation [`spark_catalog`.`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#175602L], Partition Cols: []]
   [info]   
   [info]   == Physical Plan ==
   [info]   AdaptiveSparkPlan isFinalPlan=true
   [info]   +- == Final Plan ==
   [info]      LocalTableScan <empty>, [id#175602L]
   [info]   +- == Initial Plan ==
   [info]      Sort [id#175602L ASC NULLS FIRST], true, 0
   [info]      +- Exchange rangepartitioning(id#175602L ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=176717]
   [info]         +- Scan hive spark_catalog.default.t [id#175602L], HiveTableRelation [`spark_catalog`.`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#175602L], Partition Cols: []]
   [info]   
   [info]   == Results ==
   [info]   
   [info]   == Results ==
   [info]   !== Correct Answer - 10 ==   == Spark Answer - 0 ==
   [info]    struct<>                    struct<>
   [info]   ![0]                         
   [info]   ![1]                         
   [info]   ![2]                         
   [info]   ![3]                         
   [info]   ![4]                         
   [info]   ![5]                         
   [info]   ![6]                         
   [info]   ![7]                         
   [info]   ![8]                         
   [info]   ![9] (QueryTest.scala:243)
   [info]   org.scalatest.exceptions.TestFailedException:
   [info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
   [info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
   [info]   at org.apache.spark.sql.QueryTest$.newAssertionFailedException(QueryTest.scala:233)
   [info]   at org.scalatest.Assertions.fail(Assertions.scala:933)
   [info]   at org.scalatest.Assertions.fail$(Assertions.scala:929)
   [info]   at org.apache.spark.sql.QueryTest$.fail(QueryTest.scala:233)
   [info]   at org.apache.spark.sql.QueryTest$.checkAnswer(QueryTest.scala:243)
   [info]   at org.apache.spark.sql.QueryTest.checkAnswer(QueryTest.scala:150)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.$anonfun$new$9(HiveSerDeReadWriteSuite.scala:293)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.$anonfun$new$9$adapted(HiveSerDeReadWriteSuite.scala:266)
   [info]   at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1(SQLTestUtils.scala:79)
   [info]   at org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1$adapted(SQLTestUtils.scala:78)
   [info]   at org.apache.spark.SparkFunSuite.withTempDir(SparkFunSuite.scala:225)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.org$apache$spark$sql$test$SQLTestUtils$$super$withTempDir(HiveSerDeReadWriteSuite.scala:37)
   [info]   at org.apache.spark.sql.test.SQLTestUtils.withTempDir(SQLTestUtils.scala:78)
   [info]   at org.apache.spark.sql.test.SQLTestUtils.withTempDir$(SQLTestUtils.scala:77)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.withTempDir(HiveSerDeReadWriteSuite.scala:37)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.$anonfun$new$8(HiveSerDeReadWriteSuite.scala:266)
   [info]   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
   [info]   at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491)
   [info]   at org.apache.spark.sql.test.SQLTestUtilsBase.withTable(SQLTestUtils.scala:306)
   [info]   at org.apache.spark.sql.test.SQLTestUtilsBase.withTable$(SQLTestUtils.scala:304)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.withTable(HiveSerDeReadWriteSuite.scala:37)
   [info]   at org.apache.spark.sql.hive.execution.HiveSerDeReadWriteSuite.$anonfun$new$7(HiveSerDeReadWriteSuite.scala:266)
   [info]   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
   [info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
   [info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
   [info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
   [info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
   [info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
   [info]   at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
   [info]   at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:207)
   [info]   at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
   [info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
   [info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
   [info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
   [info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
   [info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:66)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org