You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by clockfly <gi...@git.apache.org> on 2016/07/06 23:35:22 UTC

[GitHub] spark pull request #13494: [SPARK-15752] [SQL] Optimize metadata only query ...

Github user clockfly commented on a diff in the pull request:

    https://github.com/apache/spark/pull/13494#discussion_r69830577
  
    --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala ---
    @@ -1689,4 +1689,86 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
           )
         }
       }
    +
    +  test("spark-15752 optimize metadata only query for hive table") {
    +    withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") {
    +      withTable("data_15752", "srcpart_15752", "srctext_15752") {
    +        val df = Seq((1, "2"), (3, "4")).toDF("key", "value")
    +        df.createOrReplaceTempView("data_15752")
    +        sql(
    +          """
    +            |CREATE TABLE srcpart_15752 (col1 INT, col2 STRING)
    +            |PARTITIONED BY (partcol1 INT, partcol2 STRING) STORED AS parquet
    +          """.stripMargin)
    +        for (partcol1 <- Seq(11, 12); partcol2 <- Seq("a", "b")) {
    +          sql(
    +            s"""
    +              |INSERT OVERWRITE TABLE srcpart_15752
    +              |PARTITION (partcol1='$partcol1', partcol2='$partcol2')
    +              |select key, value from data_15752
    +            """.stripMargin)
    +        }
    +        checkAnswer(
    +          sql("select partcol1 from srcpart_15752 where partcol1 = 11 group by partcol1"),
    +          Row(11))
    +        checkAnswer(sql("select max(partcol1) from srcpart_15752"), Row(12))
    +        checkAnswer(sql("select max(partcol1) from srcpart_15752 where partcol1 = 11"), Row(11))
    +        checkAnswer(
    +          sql("select max(partcol1) from (select partcol1 from srcpart_15752) t"),
    +          Row(12))
    +        checkAnswer(
    +          sql("select max(col) from (select partcol1 + 1 as col from srcpart_15752 " +
    +            "where partcol1 = 12) t"),
    +          Row(13))
    +        checkAnswer(sql("select distinct partcol1 from srcpart_15752"), Row(11) :: Row(12) :: Nil)
    +        checkAnswer(sql("select distinct partcol1 from srcpart_15752 where partcol1 = 11"), Row(11))
    +        checkAnswer(
    +          sql("select distinct col from (select partcol1 + 1 as col from srcpart_15752 " +
    +            "where partcol1 = 12) t"),
    +          Row(13))
    +
    +        // Now donot support metadata only optimizer
    --- End diff --
    
    `Now donot support metadata only optimizer`
    
    What this means?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org