You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by xubo245 <gi...@git.apache.org> on 2018/02/08 09:44:59 UTC

[GitHub] carbondata pull request #1857: [WIP][CARBONDATA-2073][CARBONDATA-1516][Tests...

Github user xubo245 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1857#discussion_r166269467
  
    --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggregateLoad.scala ---
    @@ -412,8 +430,467 @@ test("check load and select for avg double datatype") {
         sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
         sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
         val rows = sql("select age,avg(age) from maintable group by age").collect()
    -    sql("create datamap maintbl_douoble on table maintable using 'preaggregate' as select avg(age) from maintable group by age")
    +    sql("create datamap maintbl_double on table maintable using 'preaggregate' as select avg(age) from maintable group by age")
         checkAnswer(sql("select age,avg(age) from maintable group by age"), rows)
    +    sql("drop table if exists maintable ")
    +  }
    +
    +  def testFunction(): Unit = {
    +    // check answer
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 26)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 29)))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test load into main table with pre-aggregate table: double") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age DOUBLE)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: short") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age SHORT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: float") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age FLOAT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: bigint") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age BIGINT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +
    +  }
    +
    +  test("test load into main table with pre-aggregate table: string") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age STRING)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "26")))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "29")))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test load into main table with pre-aggregate table: sum string column") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age STRING)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table", "name")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, null), Row(2, null), Row(3, null), Row(4, null)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, null, 0), Row(2, null, 0), Row(3, null, 0), Row(4, null, 0)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, "kunal")))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, "vishal")))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(name) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(name) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 2") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    sql("reset")
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +
    +  test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 3") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("reset")
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 26), Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +  test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 4") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    //  check value before set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 52)))
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    //  check value after set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 52), Row(1, 26)))
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    // reset
    +    sql("reset")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 78)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +
    +  test("test whether all segments are loaded into pre-aggregate table if segments are set on main table 5") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    //  check value before set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 52)))
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    //  check value after set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 52), Row(1, 26)))
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    // set *
    +    sql("set carbon.input.segments.default.segmaintable=*")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 78)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      false, "segmaintable_preagg_sum")
    --- End diff --
    
    we should support match pre aggregate table when sql("set carbon.input.segments.default.segmaintable=*")


---