You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/09/16 12:09:34 UTC

[GitHub] [spark] peter-toth commented on a change in pull request #29737: [SPARK-32864][SQL] Support ORC forced positional evolution

peter-toth commented on a change in pull request #29737:
URL: https://github.com/apache/spark/pull/29737#discussion_r489386143



##########
File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
##########
@@ -316,4 +317,24 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-32864: Support ORC forced positional evolution") {
+    Seq("native", "hive").foreach { orcImpl =>
+      withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl,
+        OrcConf.FORCE_POSITIONAL_EVOLUTION.getAttribute -> "true") {
+        withTempPath { f =>
+          val path = f.getCanonicalPath
+          Seq(1 -> 2).toDF("c1", "c2").write.orc(path)
+          checkAnswer(spark.read.orc(path), Row(1, 2))
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0
+            withTable("t") {
+              sql(s"CREATE EXTERNAL TABLE t(c3 INT, c4 INT) STORED AS ORC LOCATION '$path'")
+              checkAnswer(spark.table("t"), Row(1, 2))

Review comment:
       Thanks. I've extended the test with that case.

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
##########
@@ -142,11 +142,12 @@ object OrcUtils extends Logging {
       reader: Reader,
       conf: Configuration): Option[(Array[Int], Boolean)] = {
     val orcFieldNames = reader.getSchema.getFieldNames.asScala
+    val forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf)
     if (orcFieldNames.isEmpty) {
       // SPARK-8501: Some old empty ORC files always have an empty schema stored in their footer.
       None
     } else {
-      if (orcFieldNames.forall(_.startsWith("_col"))) {
+      if (forcePositionalEvolution || orcFieldNames.forall(_.startsWith("_col"))) {
         // This is a ORC file written by Hive, no field names in the physical schema, assume the

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org