You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/07/29 16:32:06 UTC
carbondata git commit: [CARBONDATA-1340]support tables with only
measures.Remove the restriction of having atleast 1 key column
Repository: carbondata
Updated Branches:
refs/heads/master a4fe9b869 -> 64fd5a6c5
[CARBONDATA-1340]support tables with only measures.Remove the restriction of having atleast 1 key column
support tables with only measures.Remove the restriction of having atleast 1 key column
This closes #1212
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/64fd5a6c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/64fd5a6c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/64fd5a6c
Branch: refs/heads/master
Commit: 64fd5a6c532ae51bab3a3067fb1de0f3074d215a
Parents: a4fe9b8
Author: Raghunandan S <ca...@gmail.com>
Authored: Sat Jul 29 18:00:23 2017 +0800
Committer: Jacky Li <ja...@qq.com>
Committed: Sun Jul 30 00:31:45 2017 +0800
----------------------------------------------------------------------
.../allqueries/MeasureOnlyTableTestCases.scala | 395 +++++++++++++++++++
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 7 -
.../createtable/TestCreateTableSyntax.scala | 2 +-
3 files changed, 396 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
new file mode 100644
index 0000000..4bc4bac
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/MeasureOnlyTableTestCases.scala
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.allqueries
+
+import java.io.File
+import java.math.{BigDecimal, RoundingMode}
+
+import org.apache.spark.sql.{Row, SaveMode}
+import org.scalatest.BeforeAndAfterAll
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.spark.sql.test.util.QueryTest
+
+/**
+ * Test Class for all query on multiple datatypes
+ *
+ */
+class MeasureOnlyTableTestCases extends QueryTest with BeforeAndAfterAll {
+
+ val rootPath = new File(this.getClass.getResource("/").getPath
+ + "../../../..").getCanonicalPath
+ val path = s"$rootPath/examples/spark2/src/main/resources/data.csv"
+ override def beforeAll {
+ clean
+ sql(s"""
+ | CREATE TABLE carbon_table(
+ | shortField SMALLINT,
+ | intField INT,
+ | bigintField BIGINT,
+ | doubleField DOUBLE,
+ | floatField FLOAT,
+ | decimalField DECIMAL(18,2)
+ | )
+ | STORED BY 'carbondata'
+ """.stripMargin)
+
+ val path = s"$rootPath/examples/spark2/src/main/resources/data.csv"
+
+ sql(
+ s"""
+ | LOAD DATA LOCAL INPATH '$path'
+ | INTO TABLE carbon_table
+ | OPTIONS('FILEHEADER'='shortField,intField,bigintField,doubleField,stringField,timestampField,decimalField,dateField,charField,floatField,complexData',
+ | 'COMPLEX_DELIMITER_LEVEL_1'='#')
+ """.stripMargin)
+
+
+
+ sql("create table if not exists carbon_table_hive (shortField SMALLINT,intField INT," +
+ "bigintField BIGINT,doubleField DOUBLE,stringField STRING,timestampField TIMESTAMP," +
+ "decimalField DECIMAL(18,2),dateField DATE,charField CHAR(5),floatField FLOAT,complexData ARRAY<STRING>)row format delimited fields terminated by ','")
+ sql(s"""LOAD DATA LOCAL INPATH '$path' INTO table carbon_table_hive""")
+ }
+
+ def clean {
+ sql("drop table if exists carbon_table")
+ sql("drop table if exists carbon_table_hive")
+ }
+
+ override def afterAll {
+ clean
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ }
+
+ test("SELECT sum(intField) FROM carbon_table where intField > 10") {
+ checkAnswer(
+ sql("SELECT sum(intField) FROM carbon_table where intField > 10"),
+ sql("SELECT sum(intField) FROM carbon_table_hive where intField > 10")
+ )
+ }
+
+ test("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR doubleField > 10") {
+ checkAnswer(
+ sql("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR " +
+ "doubleField > 10"),
+ sql("SELECT sum(intField),sum(doubleField) FROM carbon_table_hive where intField > 10 OR " +
+ "doubleField > 10")
+ )
+ }
+
+ test("SELECT sum(decimalField) FROM carbon_table") {
+ checkAnswer(
+ sql("SELECT sum(decimalField) FROM carbon_table"),
+ sql("SELECT sum(decimalField) FROM carbon_table_hive")
+ )
+ }
+
+ test("SELECT count(*), sum(intField) FROM carbon_table where intField > 10") {
+ checkAnswer(
+ sql("SELECT count(*), sum(intField) FROM carbon_table where intField > 10"),
+ sql("SELECT count(*), sum(intField) FROM carbon_table_hive where intField > 10")
+ )
+ }
+
+ test("SELECT count(*), sum(decimalField) b FROM carbon_table order by b") {
+ checkAnswer(
+ sql("SELECT count(*), sum(decimalField) b FROM carbon_table order by b"),
+ sql("SELECT count(*), sum(decimalField) b FROM carbon_table_hive order by b")
+ )
+ }
+
+ test("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
+ "total") {
+ checkAnswer(
+ sql("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
+ "total"),
+ sql("SELECT intField, sum(floatField) total FROM carbon_table_hive group by intField order " +
+ "by total")
+ )
+ }
+
+ test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField") {
+ checkAnswer(
+ sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField"),
+ sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField")
+ )
+ }
+
+ test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order by " +
+ "a") {
+ checkAnswer(
+ sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order " +
+ "by a"),
+ sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField " +
+ "order by a")
+ )
+ }
+
+ test("select shortField, avg(intField+ intField) as a from carbon_table group by shortField " +
+ "order by a") {
+ checkAnswer(
+ sql("select shortField, avg(intField+ intField) as a from carbon_table group by shortField order " +
+ "by a"),
+ sql("select shortField, avg(intField+ intField) as a from carbon_table_hive group by " +
+ "shortField order by a")
+ )
+
+ }
+
+ test("select shortField, count(intField+ 10) as a from carbon_table group by shortField order " +
+ "by a") {
+ checkAnswer(
+ sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
+ "order by a"),
+ sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
+ " order by a")
+ )
+ }
+
+ test("select shortField, min(intField+ 10) as a from carbon_table group by shortField order " +
+ "by a") {
+ checkAnswer(
+ sql("select shortField, min(intField+ 10) as a from carbon_table group by shortField " +
+ "order by a"),
+ sql("select shortField, min(intField+ 10) as a from carbon_table_hive group by shortField " +
+ "order by a")
+ )
+ }
+
+ test("select shortField, max(intField+ 10) as a from carbon_table group by shortField order " +
+ "by a") {
+ checkAnswer(
+ sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
+ "order by a"),
+ sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
+ " order by a")
+ )
+ }
+
+ test("select shortField, sum(distinct intField) + 10 as a from carbon_table group by shortField" +
+ "order by a") {
+ checkAnswer(
+ sql("select shortField, sum(distinct intField) + 10 as a from carbon_table group by " +
+ "shortField order by a"),
+ sql("select shortField, sum(distinct intField) + 10 as a from carbon_table_hive group by " +
+ "shortField order by a")
+ )
+ }
+
+ test("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField") {
+ checkAnswer(
+ sql("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField"),
+ sql("select sum(doubleField) + 7.28 as a, intField from carbon_table_hive group by intField")
+ )
+ }
+
+ test("select count(floatField) + 7.28 a, intField from carbon_table group by intField") {
+ checkAnswer(
+ sql("select count(floatField) + 7.28 a, intField from carbon_table group by intField"),
+ sql("select count(floatField) + 7.28 a, intField from carbon_table_hive group by intField")
+ )
+ }
+
+ test("select count(distinct floatField) + 7.28 a, intField from carbon_table group by " +
+ "intField") {
+ checkAnswer(
+ sql("select count(distinct floatField) + 7.28 a, intField from carbon_table group by intField"),
+ sql("select count(distinct floatField) + 7.28 a, intField from carbon_table_hive group" +
+ " by intField")
+ )
+ }
+
+ test("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table") {
+ checkAnswer(
+ sql("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table"),
+ sql("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table_hive")
+ )
+ }
+
+ test("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table") {
+ checkAnswer(
+ sql("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table"),
+ sql("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table_hive")
+ )
+ }
+
+
+ test("select avg (if(floatField>100,NULL,floatField)) a from carbon_table") {
+ checkAnswer(
+ sql("select avg (if(floatField>100,NULL,floatField)) a from carbon_table"),
+ sql("select avg (if(floatField>100,NULL,floatField)) a from carbon_table_hive")
+ )
+ }
+
+ test("select min (if(intField>100,NULL,intField)) a from carbon_table") {
+ checkAnswer(
+ sql("select min (if(intField>3,NULL,intField)) a from carbon_table"),
+ sql("select min (if(intField>3,NULL,intField)) a from carbon_table_hive")
+ )
+ }
+
+ test("select max (if(intField>5,NULL,intField)) a from carbon_table")({
+ checkAnswer(
+ sql("select max (if(intField>5,NULL,intField)) a from carbon_table"),
+ sql("select max (if(intField>5,NULL,intField)) a from carbon_table_hive")
+ )
+ })
+
+ test("select variance(doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select variance(doubleField) as a from carbon_table"),
+ sql("select variance(doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select var_samp(doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select var_samp(doubleField) as a from carbon_table"),
+ sql("select var_samp(doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select stddev_pop(doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select stddev_pop(doubleField) as a from carbon_table"),
+ sql("select stddev_pop(doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ //TC_106
+ test("select stddev_samp(doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select stddev_samp(doubleField) as a from carbon_table"),
+ sql("select stddev_samp(doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select covar_pop(doubleField,doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select covar_pop(doubleField,doubleField) as a from carbon_table"),
+ sql("select covar_pop(doubleField,doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select covar_samp(doubleField,doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select covar_samp(doubleField,doubleField) as a from carbon_table"),
+ sql("select covar_samp(doubleField,doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select corr(doubleField,doubleField) as a from carbon_table")({
+ checkAnswer(
+ sql("select corr(doubleField,doubleField) as a from carbon_table"),
+ sql("select corr(doubleField,doubleField) as a from carbon_table_hive")
+ )
+ })
+
+ test("select percentile(bigintField,0.2) as a from carbon_table")({
+ checkAnswer(
+ sql("select percentile(bigintField,0.2) as a from carbon_table"),
+ sql("select percentile(bigintField,0.2) as a from carbon_table_hive"))
+ })
+
+ test("select last(doubleField) a from carbon_table")({
+ checkAnswer(
+ sql("select last(doubleField) a from carbon_table"),
+ sql("select last(doubleField) a from carbon_table_hive")
+ )
+ })
+
+ test("select intField from carbon_table where carbon_table.intField IN (3,2)")({
+ checkAnswer(
+ sql("select intField from carbon_table where carbon_table.intField IN (3,2)"),
+ sql("select intField from carbon_table_hive where carbon_table_hive.intField IN (3,2)")
+ )
+ })
+
+ test("select intField from carbon_table where carbon_table.intField NOT IN (3,2)")({
+ checkAnswer(
+ sql("select intField from carbon_table where carbon_table.intField NOT IN (3,2)"),
+ sql("select intField from carbon_table_hive where carbon_table_hive.intField NOT IN (3,2)")
+ )
+ })
+
+ test("select intField,sum(floatField) a from carbon_table group by intField order by a " +
+ "desc")({
+ checkAnswer(
+ sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
+ "a desc"),
+ sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
+ "a desc")
+ )
+ })
+
+ test("select intField,sum(floatField) a from carbon_table group by intField order by a" +
+ " asc")({
+ checkAnswer(
+ sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
+ "a asc"),
+ sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
+ "a asc")
+ )
+ })
+
+ test("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField")({
+ checkAnswer(
+ sql("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField"),
+ sql("select doubleField from carbon_table_hive where doubleField NOT BETWEEN intField AND " +
+ "floatField")
+ )
+ })
+
+ test("select cast(doubleField as int) as a from carbon_table limit 10")({
+ checkAnswer(
+ sql("select cast(doubleField as int) as a from carbon_table limit 10"),
+ sql("select cast(doubleField as int) as a from carbon_table_hive limit 10")
+ )
+ })
+
+ test("select percentile_approx(1, 0.5 ,5000) from carbon_table")({
+ checkAnswer(
+ sql("select percentile_approx(1, 0.5 ,5000) from carbon_table"),
+ sql("select percentile_approx(1, 0.5 ,5000) from carbon_table_hive")
+ )
+ })
+
+ test("CARBONDATA-60-union-defect")({
+ sql("drop table if exists carbonunion")
+ import sqlContext.implicits._
+ val df = sqlContext.sparkContext.parallelize(1 to 1000).map(x => (x, (x+100))).toDF("c1", "c2")
+ df.registerTempTable("sparkunion")
+ df.write
+ .format("carbondata")
+ .mode(SaveMode.Overwrite)
+ .option("tableName", "carbonunion")
+ .save()
+ checkAnswer(
+ sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from carbonunion union all select c2 as c1,c1 as c2 from carbonunion)t where c1=200 group by c1"),
+ sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from sparkunion union all select c2 as c1,c1 as c2 from sparkunion)t where c1=200 group by c1"))
+ sql("drop table if exists carbonunion")
+ })
+
+ test("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField")({
+ checkAnswer(
+ sql("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField"),
+ sql("select b.intField from carbon_table_hive a join carbon_table_hive b on a.intField=b.intField"))
+ })
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index a309beb..9aef6c9 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -253,13 +253,6 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
}
val (dims, msrs, noDictionaryDims, sortKeyDims) = extractDimAndMsrFields(
fields, tableProperties)
- if (dims.isEmpty && !isAlterFlow) {
- throw new MalformedCarbonCommandException(
- s"Table ${dbName.getOrElse(CarbonCommonConstants.DATABASE_DEFAULT_NAME)}.$tableName " +
- "can not be created without key columns. Please use DICTIONARY_INCLUDE or " +
- "DICTIONARY_EXCLUDE to set at least one key column " +
- "if all specified columns are numeric types")
- }
// column properties
val colProps = extractColumnProperties(fields, tableProperties)
http://git-wip-us.apache.org/repos/asf/carbondata/blob/64fd5a6c/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
index 3bfe8d6..62713fa 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createtable/TestCreateTableSyntax.scala
@@ -153,7 +153,7 @@ class TestCreateTableSyntax extends QueryTest with BeforeAndAfterAll {
sql("drop table if exists carbontable")
sql("create table carbontable(msr1 int, msr2 double, msr3 bigint, msr4 decimal)" +
" stored by 'org.apache.carbondata.format'")
- assert(false)
+ assert(true)
} catch {
case e : MalformedCarbonCommandException => {
assert(e.getMessage.equals("Table default.carbontable can not be created without " +