You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/07/29 16:32:06 UTC
carbondata git commit: [CARBONDATA-1340]support tables with only measures.Remove the restriction of having atleast 1 key column

Repository: carbondata
Updated Branches:
  refs/heads/master a4fe9b869 -> 64fd5a6c5


[CARBONDATA-1340]support tables with only measures.Remove the restriction of having atleast 1 key column

support tables with only measures.Remove the restriction of having atleast 1 key column

This closes #1212


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/64fd5a6c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/64fd5a6c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/64fd5a6c

Branch: refs/heads/master
Commit: 64fd5a6c532ae51bab3a3067fb1de0f3074d215a
Parents: a4fe9b8
Author: Raghunandan S <ca...@gmail.com>
Authored: Sat Jul 29 18:00:23 2017 +0800
Committer: Jacky Li <ja...@qq.com>
Committed: Sun Jul 30 00:31:45 2017 +0800

----------------------------------------------------------------------
 .../allqueries/MeasureOnlyTableTestCases.scala  | 395 +++++++++++++++++++
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   7 -
 .../createtable/TestCreateTableSyntax.scala     |   2 +-
 3 files changed, 396 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
new file mode 100644
index 0000000..4bc4bac
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.allqueries
+
+import java.io.File
+import java.math.{BigDecimal, RoundingMode}
+
+import org.apache.spark.sql.{Row, SaveMode}
+import org.scalatest.BeforeAndAfterAll
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.spark.sql.test.util.QueryTest
+
+/**
+  * Test Class for all query on multiple datatypes
+  *
+  */
+class MeasureOnlyTableTestCases extends QueryTest with BeforeAndAfterAll {
+
+  val rootPath = new File(this.getClass.getResource("/").getPath
+    + "../../../..").getCanonicalPath
+  val path = s"$rootPath/examples/spark2/src/main/resources/data.csv"
+  override def beforeAll {
+    clean
+    sql(s"""
+               | CREATE TABLE carbon_table(
+               | shortField SMALLINT,
+               | intField INT,
+               | bigintField BIGINT,
+               | doubleField DOUBLE,
+               | floatField FLOAT,
+               | decimalField DECIMAL(18,2)
+               | )
+               | STORED BY 'carbondata'
+             """.stripMargin)
+
+          val path = s"$rootPath/examples/spark2/src/main/resources/data.csv"
+
+          sql(
+            s"""
+               | LOAD DATA LOCAL INPATH '$path'
+               | INTO TABLE carbon_table
+               | OPTIONS('FILEHEADER'='shortField,intField,bigintField,doubleField,stringField,timestampField,decimalField,dateField,charField,floatField,complexData',
+               | 'COMPLEX_DELIMITER_LEVEL_1'='#')
+             """.stripMargin)
+
+
+
+    sql("create table if not exists carbon_table_hive (shortField SMALLINT,intField INT," +
+      "bigintField BIGINT,doubleField DOUBLE,stringField STRING,timestampField TIMESTAMP," +
+      "decimalField DECIMAL(18,2),dateField DATE,charField CHAR(5),floatField FLOAT,complexData ARRAY<STRING>)row format delimited fields terminated by ','")
+    sql(s"""LOAD DATA LOCAL INPATH '$path' INTO table carbon_table_hive""")
+  }
+
+  def clean {
+    sql("drop table if exists carbon_table")
+    sql("drop table if exists carbon_table_hive")
+  }
+
+  override def afterAll {
+    clean
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+  }
+
+  test("SELECT sum(intField) FROM carbon_table where intField > 10") {
+    checkAnswer(
+      sql("SELECT sum(intField) FROM carbon_table where intField > 10"),
+      sql("SELECT sum(intField) FROM carbon_table_hive where intField > 10")
+    )
+  }
+
+  test("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR doubleField > 10") {
+    checkAnswer(
+      sql("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR " +
+      "doubleField > 10"),
+      sql("SELECT sum(intField),sum(doubleField) FROM carbon_table_hive where intField > 10 OR " +
+        "doubleField > 10")
+    )
+  }
+
+  test("SELECT sum(decimalField) FROM carbon_table") {
+    checkAnswer(
+      sql("SELECT sum(decimalField) FROM carbon_table"),
+      sql("SELECT sum(decimalField) FROM carbon_table_hive")
+    )
+  }
+
+  test("SELECT count(*), sum(intField) FROM carbon_table where intField > 10") {
+    checkAnswer(
+    sql("SELECT count(*), sum(intField) FROM carbon_table where intField > 10"),
+      sql("SELECT count(*), sum(intField) FROM carbon_table_hive where intField > 10")
+    )
+  }
+
+  test("SELECT count(*), sum(decimalField) b FROM carbon_table order by b") {
+    checkAnswer(
+    sql("SELECT count(*), sum(decimalField) b FROM carbon_table order by b"),
+      sql("SELECT count(*), sum(decimalField) b FROM carbon_table_hive order by b")
+    )
+  }
+
+  test("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
+    "total") {
+    checkAnswer(
+    sql("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
+      "total"),
+      sql("SELECT intField, sum(floatField) total FROM carbon_table_hive group by intField order " +
+        "by total")
+    )
+  }
+
+  test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField") {
+    checkAnswer(
+      sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField"),
+      sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField")
+    )
+  }
+
+  test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order by " +
+    "a") {
+    checkAnswer(
+      sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order " +
+        "by a"),
+      sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField " +
+        "order by a")
+    )
+  }
+
+  test("select shortField, avg(intField+ intField) as a from carbon_table group by shortField " +
+    "order by a") {
+    checkAnswer(
+      sql("select shortField, avg(intField+ intField) as a from carbon_table group by shortField order " +
+        "by a"),
+      sql("select shortField, avg(intField+ intField) as a from carbon_table_hive group by " +
+        "shortField order by a")
+    )
+
+  }
+
+  test("select shortField, count(intField+ 10) as a from carbon_table group by shortField order " +
+    "by a") {
+    checkAnswer(
+      sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
+        "order by a"),
+      sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
+        " order by a")
+    )
+  }
+
+  test("select shortField, min(intField+ 10) as a from carbon_table group by shortField order " +
+    "by a") {
+    checkAnswer(
+      sql("select shortField, min(intField+ 10) as a from carbon_table group by shortField " +
+        "order by a"),
+      sql("select shortField, min(intField+ 10) as a from carbon_table_hive group by shortField " +
+        "order by a")
+    )
+  }
+
+  test("select shortField, max(intField+ 10) as a from carbon_table group by shortField order " +
+    "by a") {
+    checkAnswer(
+      sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
+        "order by a"),
+      sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
+        " order by a")
+    )
+  }
+
+  test("select shortField, sum(distinct intField) + 10 as a from carbon_table group by shortField" +
+    "order by a") {
+    checkAnswer(
+      sql("select shortField, sum(distinct intField) + 10 as a from carbon_table group by " +
+        "shortField order by a"),
+      sql("select shortField, sum(distinct intField) + 10 as a from carbon_table_hive group by " +
+        "shortField order by a")
+    )
+  }
+
+  test("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField") {
+    checkAnswer(
+      sql("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField"),
+      sql("select sum(doubleField) + 7.28 as a, intField from carbon_table_hive group by intField")
+    )
+  }
+
+  test("select count(floatField) + 7.28 a, intField from carbon_table group by intField") {
+    checkAnswer(
+      sql("select count(floatField) + 7.28 a, intField from carbon_table group by intField"),
+      sql("select count(floatField) + 7.28 a, intField from carbon_table_hive group by intField")
+    )
+  }
+
+  test("select count(distinct floatField) + 7.28 a, intField from carbon_table group by " +
+    "intField") {
+    checkAnswer(
+      sql("select count(distinct floatField) + 7.28 a, intField from carbon_table group by intField"),
+      sql("select count(distinct floatField) + 7.28 a, intField from carbon_table_hive group" +
+        " by intField")
+    )
+  }
+
+  test("select count (if(doubleField>100,NULL,doubleField))  a from carbon_table") {
+    checkAnswer(
+      sql("select count (if(doubleField>100,NULL,doubleField))  a from carbon_table"),
+      sql("select count (if(doubleField>100,NULL,doubleField))  a from carbon_table_hive")
+    )
+  }
+
+  test("select count (if(decimalField>100,NULL,decimalField))  a from carbon_table") {
+    checkAnswer(
+      sql("select count (if(decimalField>100,NULL,decimalField))  a from carbon_table"),
+      sql("select count (if(decimalField>100,NULL,decimalField))  a from carbon_table_hive")
+    )
+  }
+
+
+  test("select avg (if(floatField>100,NULL,floatField))  a from carbon_table") {
+    checkAnswer(
+      sql("select avg (if(floatField>100,NULL,floatField))  a from carbon_table"),
+      sql("select avg (if(floatField>100,NULL,floatField))  a from carbon_table_hive")
+    )
+  }
+
+  test("select min (if(intField>100,NULL,intField))  a from carbon_table") {
+    checkAnswer(
+      sql("select min (if(intField>3,NULL,intField))  a from carbon_table"),
+      sql("select min (if(intField>3,NULL,intField))  a from carbon_table_hive")
+    )
+  }
+
+  test("select max (if(intField>5,NULL,intField))  a from carbon_table")({
+    checkAnswer(
+      sql("select max (if(intField>5,NULL,intField))  a from carbon_table"),
+      sql("select max (if(intField>5,NULL,intField))  a from carbon_table_hive")
+    )
+  })
+
+  test("select variance(doubleField) as a from carbon_table")({
+    checkAnswer(
+      sql("select variance(doubleField) as a from carbon_table"),
+      sql("select variance(doubleField) as a from carbon_table_hive")
+    )
+  })
+
+  test("select var_samp(doubleField) as a  from carbon_table")({
+    checkAnswer(
+      sql("select var_samp(doubleField) as a  from carbon_table"),
+      sql("select var_samp(doubleField) as a  from carbon_table_hive")
+    )
+  })
+
+  test("select stddev_pop(doubleField) as a  from carbon_table")({
+    checkAnswer(
+      sql("select stddev_pop(doubleField) as a  from carbon_table"),
+      sql("select stddev_pop(doubleField) as a  from carbon_table_hive")
+    )
+  })
+
+  //TC_106
+  test("select stddev_samp(doubleField)  as a from carbon_table")({
+    checkAnswer(
+      sql("select stddev_samp(doubleField)  as a from carbon_table"),
+      sql("select stddev_samp(doubleField)  as a from carbon_table_hive")
+    )
+  })
+
+  test("select covar_pop(doubleField,doubleField) as a  from carbon_table")({
+    checkAnswer(
+      sql("select covar_pop(doubleField,doubleField) as a  from carbon_table"),
+      sql("select covar_pop(doubleField,doubleField) as a  from carbon_table_hive")
+    )
+  })
+
+  test("select covar_samp(doubleField,doubleField) as a  from carbon_table")({
+    checkAnswer(
+      sql("select covar_samp(doubleField,doubleField) as a  from carbon_table"),
+      sql("select covar_samp(doubleField,doubleField) as a  from carbon_table_hive")
+    )
+  })
+
+  test("select corr(doubleField,doubleField)  as a from carbon_table")({
+    checkAnswer(
+      sql("select corr(doubleField,doubleField)  as a from carbon_table"),
+      sql("select corr(doubleField,doubleField)  as a from carbon_table_hive")
+    )
+  })
+
+  test("select percentile(bigintField,0.2) as  a  from carbon_table")({
+    checkAnswer(
+      sql("select percentile(bigintField,0.2) as  a  from carbon_table"),
+      sql("select percentile(bigintField,0.2) as  a  from carbon_table_hive"))
+  })
+
+  test("select last(doubleField) a from carbon_table")({
+    checkAnswer(
+      sql("select last(doubleField) a from carbon_table"),
+      sql("select last(doubleField) a from carbon_table_hive")
+    )
+  })
+
+  test("select intField from carbon_table where carbon_table.intField IN (3,2)")({
+    checkAnswer(
+      sql("select intField from carbon_table where carbon_table.intField IN (3,2)"),
+      sql("select intField from carbon_table_hive where carbon_table_hive.intField IN (3,2)")
+    )
+  })
+
+  test("select intField from carbon_table where carbon_table.intField NOT IN (3,2)")({
+    checkAnswer(
+      sql("select intField from carbon_table where carbon_table.intField NOT IN (3,2)"),
+      sql("select intField from carbon_table_hive where carbon_table_hive.intField NOT IN (3,2)")
+    )
+  })
+
+  test("select intField,sum(floatField) a from carbon_table group by intField order by a " +
+    "desc")({
+    checkAnswer(
+      sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
+        "a desc"),
+      sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
+        "a desc")
+    )
+  })
+
+  test("select intField,sum(floatField) a from carbon_table group by intField order by a" +
+    " asc")({
+    checkAnswer(
+      sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
+        "a asc"),
+      sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
+        "a asc")
+    )
+  })
+
+  test("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField")({
+    checkAnswer(
+      sql("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField"),
+      sql("select doubleField from carbon_table_hive where doubleField NOT BETWEEN intField AND " +
+        "floatField")
+    )
+  })
+
+  test("select cast(doubleField as int) as a from carbon_table limit 10")({
+    checkAnswer(
+      sql("select cast(doubleField as int) as a from carbon_table limit 10"),
+      sql("select cast(doubleField as int) as a from carbon_table_hive limit 10")
+    )
+  })
+
+  test("select percentile_approx(1, 0.5 ,5000) from carbon_table")({
+    checkAnswer(
+      sql("select percentile_approx(1, 0.5 ,5000) from carbon_table"),
+      sql("select percentile_approx(1, 0.5 ,5000) from carbon_table_hive")
+    )
+  })
+
+  test("CARBONDATA-60-union-defect")({
+    sql("drop table if exists carbonunion")
+    import sqlContext.implicits._
+    val df = sqlContext.sparkContext.parallelize(1 to 1000).map(x => (x, (x+100))).toDF("c1", "c2")
+    df.registerTempTable("sparkunion")
+    df.write
+      .format("carbondata")
+      .mode(SaveMode.Overwrite)
+      .option("tableName", "carbonunion")
+      .save()
+    checkAnswer(
+      sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from carbonunion union all select c2 as c1,c1 as c2 from carbonunion)t where c1=200 group by c1"),
+      sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from sparkunion union all select c2 as c1,c1 as c2 from sparkunion)t where c1=200 group by c1"))
+    sql("drop table if exists carbonunion")
+  })
+
+  test("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField")({
+    checkAnswer(
+      sql("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField"),
+      sql("select b.intField from carbon_table_hive a join carbon_table_hive b on a.intField=b.intField"))
+  })
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index a309beb..9aef6c9 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -253,13 +253,6 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
     }
     val (dims, msrs, noDictionaryDims, sortKeyDims) = extractDimAndMsrFields(
       fields, tableProperties)
-    if (dims.isEmpty && !isAlterFlow) {
-      throw new MalformedCarbonCommandException(
-        s"Table ${dbName.getOrElse(CarbonCommonConstants.DATABASE_DEFAULT_NAME)}.$tableName " +
-        "can not be created without key columns. Please use DICTIONARY_INCLUDE or " +
-        "DICTIONARY_EXCLUDE to set at least one key column " +
-        "if all specified columns are numeric types")
-    }
 
     // column properties
     val colProps = extractColumnProperties(fields, tableProperties)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
index 3bfe8d6..62713fa 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
@@ -153,7 +153,7 @@ class TestCreateTableSyntax extends QueryTest with BeforeAndAfterAll {
       sql("drop table if exists carbontable")
       sql("create table carbontable(msr1 int, msr2 double, msr3 bigint, msr4 decimal)" +
         " stored by 'org.apache.carbondata.format'")
-      assert(false)
+      assert(true)
     } catch {
       case e : MalformedCarbonCommandException => {
         assert(e.getMessage.equals("Table default.carbontable can not be created without " +