You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by xubo245 <gi...@git.apache.org> on 2018/02/08 09:44:59 UTC
[GitHub] carbondata pull request #1857: [WIP][CARBONDATA-2073][CARBONDATA-1516][Tests...
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1857#discussion_r166269467
--- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggregateLoad.scala ---
@@ -412,8 +430,467 @@ test("check load and select for avg double datatype") {
sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
val rows = sql("select age,avg(age) from maintable group by age").collect()
- sql("create datamap maintbl_douoble on table maintable using 'preaggregate' as select avg(age) from maintable group by age")
+ sql("create datamap maintbl_double on table maintable using 'preaggregate' as select avg(age) from maintable group by age")
checkAnswer(sql("select age,avg(age) from maintable group by age"), rows)
+ sql("drop table if exists maintable ")
+ }
+
+ def testFunction(): Unit = {
+ // check answer
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 26)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 29)))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test load into main table with pre-aggregate table: double") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age DOUBLE)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: short") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age SHORT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: float") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age FLOAT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: bigint") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age BIGINT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+
+ }
+
+ test("test load into main table with pre-aggregate table: string") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age STRING)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "26")))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "29")))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test load into main table with pre-aggregate table: sum string column") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age STRING)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table", "name")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, null), Row(2, null), Row(3, null), Row(4, null)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, null, 0), Row(2, null, 0), Row(3, null, 0), Row(4, null, 0)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, "kunal")))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, "vishal")))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(name) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(name) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 2") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ false, "segmaintable_preagg_sum")
+
+ sql("reset")
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+
+ test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 3") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("reset")
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 26), Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+ test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 4") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ // check value before set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 52)))
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ // check value after set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 52), Row(1, 26)))
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ false, "segmaintable_preagg_sum")
+
+ // reset
+ sql("reset")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 78)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+
+ test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 5") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ // check value before set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 52)))
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ // check value after set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 52), Row(1, 26)))
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ false, "segmaintable_preagg_sum")
+
+ // set *
+ sql("set carbon.input.segments.default.segmaintable=*")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 78)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ false, "segmaintable_preagg_sum")
--- End diff --
we should support match pre aggregate table when sql("set carbon.input.segments.default.segmaintable=*")
---