You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by dongjoon-hyun <gi...@git.apache.org> on 2017/10/12 19:27:09 UTC

[GitHub] spark pull request #19470: [SPARK-14387][SPARK-18355][SQL] Use Spark schema ...

Github user dongjoon-hyun commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19470#discussion_r144387023
  
    --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala ---
    @@ -2050,4 +2050,80 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
           }
         }
       }
    +
    +  test("SPARK-18355 Use Spark schema to read ORC table instead of ORC file schema") {
    +    val client = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
    +
    +    Seq("true", "false").foreach { value =>
    +      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
    +        withTempDatabase { db =>
    +          client.runSqlHive(
    +            s"""
    +               |CREATE TABLE $db.t(
    +               |  click_id string,
    +               |  search_id string,
    +               |  uid bigint)
    +               |PARTITIONED BY (
    +               |  ts string,
    +               |  hour string)
    +               |STORED AS ORC
    +             """.stripMargin)
    +
    +          client.runSqlHive(
    +            s"""
    +               |INSERT INTO TABLE $db.t
    +               |PARTITION (ts = '98765', hour = '01')
    +               |VALUES (12, 2, 12345)
    +             """.stripMargin
    +          )
    +
    +          checkAnswer(
    +            sql(s"SELECT * FROM $db.t"),
    +            Row("12", "2", 12345, "98765", "01"))
    +
    +          client.runSqlHive(s"ALTER TABLE $db.t ADD COLUMNS (dummy string)")
    +
    +          checkAnswer(
    +            sql(s"SELECT click_id, search_id FROM $db.t"),
    +            Row("12", "2"))
    +
    +          checkAnswer(
    +            sql(s"SELECT search_id, click_id FROM $db.t"),
    +            Row("2", "12"))
    +
    +          checkAnswer(
    +            sql(s"SELECT search_id FROM $db.t"),
    +            Row("2"))
    +
    +          checkAnswer(
    +            sql(s"SELECT dummy, click_id FROM $db.t"),
    +            Row(null, "12"))
    +
    +          checkAnswer(
    +            sql(s"SELECT * FROM $db.t"),
    +            Row("12", "2", 12345, null, "98765", "01"))
    +        }
    +      }
    +    }
    +  }
    +
    +  // This test case is added to prevent regression.
    +  test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
    --- End diff --
    
    This is added to prevent regression according to your request, @gatorsmile ~


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org