You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2017/05/04 06:15:07 UTC

[1/2] incubator-carbondata git commit: added check for null value in expression result

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 35a286950 -> ccd8175f8


added check for null value in expression result


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/58751fd5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/58751fd5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/58751fd5

Branch: refs/heads/master
Commit: 58751fd573be920193f4256c6a204b4c4986aabf
Parents: 35a2869
Author: kunal642 <ku...@knoldus.in>
Authored: Tue May 2 17:17:28 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu May 4 11:43:21 2017 +0530

----------------------------------------------------------------------
 .../expression/conditional/InExpression.java    |  14 +-
 .../expression/conditional/NotInExpression.java |  12 +-
 .../src/test/resources/filter/datawithnull.csv  |   3 +
 .../test/resources/filter/datawithoutnull.csv   |   3 +
 .../ExpressionWithNullTestCase.scala            | 150 +++++++++++++++++++
 5 files changed, 180 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/58751fd5/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
index fe0cb13..130063d 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
@@ -82,7 +82,19 @@ public class InExpression extends BinaryConditionalExpression {
         setOfExprResult.add(val);
       }
     }
-    leftRsult.set(DataType.BOOLEAN, setOfExprResult.contains(leftRsult));
+    // Only left result needs to be checked for null because InExpression is basically
+    // an OR Operation on the list of predicates that are provided.
+    // Example: x in (1,2,null) would be converted to x=1 OR x=2 OR x=null.
+    // If any of the predicates is null then the result is unknown thus we will return false
+    // for x=null.
+    // Left check will cover both the cases when left and right is null therefore no need
+    // for a check on the right result.
+    // Example: (null==null) -> Left null return false, (1==null) would automatically be false.
+    if (leftRsult.isNull()) {
+      leftRsult.set(DataType.BOOLEAN, false);
+    } else {
+      leftRsult.set(DataType.BOOLEAN, setOfExprResult.contains(leftRsult));
+    }
     return leftRsult;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/58751fd5/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
index 2c86724..2552f96 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
@@ -80,8 +80,18 @@ public class NotInExpression extends BinaryConditionalExpression {
         setOfExprResult.add(val);
       }
     }
+    // Both left and right results need to be checked for null because NotInExpression is basically
+    // an And Operation on the list of predicates that are provided.
+    // Example: x in (1,2,null) would be converted to x=1 AND x=2 AND x=null.
+    // If any of the predicates is null then the result is unknown for all the predicates thus
+    // we will return false for each of them.
+    for (ExpressionResult expressionResult: setOfExprResult) {
+      if (expressionResult.isNull() || leftRsult.isNull()) {
+        leftRsult.set(DataType.BOOLEAN, false);
+        return leftRsult;
+      }
+    }
     leftRsult.set(DataType.BOOLEAN, !setOfExprResult.contains(leftRsult));
-
     return leftRsult;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/58751fd5/integration/spark-common-test/src/test/resources/filter/datawithnull.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/filter/datawithnull.csv b/integration/spark-common-test/src/test/resources/filter/datawithnull.csv
new file mode 100644
index 0000000..cf9f0b7
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/filter/datawithnull.csv
@@ -0,0 +1,3 @@
+1,emp1,1234
+2,emp2,4321
+3,emp3,xyz
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/58751fd5/integration/spark-common-test/src/test/resources/filter/datawithoutnull.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/filter/datawithoutnull.csv b/integration/spark-common-test/src/test/resources/filter/datawithoutnull.csv
new file mode 100644
index 0000000..e4ec272
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/filter/datawithoutnull.csv
@@ -0,0 +1,3 @@
+1,emp1,1234
+2,emp2,4321
+3,emp3,22
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/58751fd5/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
new file mode 100644
index 0000000..a421c7e
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.spark.testsuite.detailquery
+
+import org.apache.spark.sql.common.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+class ExpressionWithNullTestCase extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll() {
+    sql("drop table if exists expression_test")
+    sql("drop table if exists expression_test_hive")
+    sql("drop table if exists expression")
+    sql("drop table if exists expression_hive")
+    sql("create table expression_test (id int, name string, number int) stored by 'carbondata'")
+    sql(s"load data local inpath '$resourcesPath/filter/datawithnull.csv' into table expression_test options('FILEHEADER'='id,name,number')")
+    sql("""create table expression_test_hive (id int, name string, number int) row format delimited fields terminated by ','""")
+    sql(s"load data local inpath '$resourcesPath/filter/datawithnull.csv' into table expression_test_hive")
+
+    sql("""create table expression (id int, name string, number int) row format delimited fields terminated by ','""")
+    sql(s"load data local inpath '$resourcesPath/filter/datawithoutnull.csv' into table expression")
+    sql("""create table expression_hive (id int, name string, number int) row format delimited fields terminated by ','""")
+    sql(s"load data local inpath '$resourcesPath/filter/datawithoutnull.csv' into table expression_hive")
+  }
+
+  test("test to check in expression with null values") {
+    checkAnswer(sql("select * from expression_test where id in (1,2,'')"), sql("select * from expression_test_hive where id in (1,2,'')"))
+    checkAnswer(sql("select * from expression_test where id in ('')"), sql("select * from expression_test_hive where id in ('')"))
+    checkAnswer(sql("select * from expression_test where number in (null)"), sql("select * from expression_test_hive where number in (null)"))
+    checkAnswer(sql("select * from expression_test where number in (2)"), sql("select * from expression_test_hive where number in (2)"))
+    checkAnswer(sql("select * from expression_test where number in (1,null)"), sql("select * from expression_test_hive where number in (1,null)"))
+    checkAnswer(sql("select * from expression where number in (1,null)"), sql("select * from expression_hive where number in (1,null)"))
+    checkAnswer(sql("select * from expression where id in (3)"), sql("select * from expression_hive where id in (3)"))
+    checkAnswer(sql("select * from expression where id in ('2')"), sql("select * from expression_hive where id in ('2')"))
+    checkAnswer(sql("select * from expression where id in (cast('2' as int))"), sql("select * from expression_hive where id in (cast('2' as int))"))
+    checkAnswer(sql("select * from expression_test where id in (3)"), sql("select * from expression_test_hive where id in (3)"))
+    checkAnswer(sql("select * from expression_test where id in ('2')"), sql("select * from expression_test_hive where id in ('2')"))
+    checkAnswer(sql("select * from expression_test where id in (cast('2' as int))"), sql("select * from expression_test_hive where id in (cast('2' as int))"))
+    checkAnswer(sql("select * from expression_test where id in (cast('null' as int))"), sql("select * from expression_test_hive where id in (cast('null' as int))"))
+  }
+
+  test("test to check not in expression with null values") {
+    checkAnswer(sql("select * from expression_test where id not in (1,2,'')"), sql("select * from expression_test_hive where id not in (1,2,'')"))
+    checkAnswer(sql("select * from expression_test where id not in ('')"), sql("select * from expression_test_hive where id not in ('')"))
+    checkAnswer(sql("select * from expression_test where number not in (null)"), sql("select * from expression_test_hive where number not in (null)"))
+    checkAnswer(sql("select * from expression_test where number not in (1,null)"), sql("select * from expression_test_hive where number not in (1,null)"))
+    checkAnswer(sql("select * from expression where number not in (1,null)"), sql("select * from expression_hive where number not in (1,null)"))
+    checkAnswer(sql("select * from expression where id not in (3)"), sql("select * from expression_hive where id not in (3)"))
+    checkAnswer(sql("select * from expression where id not in ('2')"), sql("select * from expression_hive where id not in ('2')"))
+    checkAnswer(sql("select * from expression where id not in (cast('2' as int))"), sql("select * from expression_hive where id not in (cast('2' as int))"))
+    checkAnswer(sql("select * from expression_test where id not in (3)"), sql("select * from expression_test_hive where id not in (3)"))
+    checkAnswer(sql("select * from expression_test where id not in ('2')"), sql("select * from expression_test_hive where id not in ('2')"))
+    checkAnswer(sql("select * from expression_test where id not in (cast('2' as int))"), sql("select * from expression_test_hive where id not in (cast('2' as int))"))
+    checkAnswer(sql("select * from expression_test where id not in (cast('null' as int))"), sql("select * from expression_test_hive where id not in (cast('null' as int))"))
+  }
+
+  test("test to check equals expression with null values") {
+    checkAnswer(sql("select * from expression_test where id=''"), sql("select * from expression_test_hive where id=''"))
+    checkAnswer(sql("select * from expression_test where id=' '"), sql("select * from expression_test_hive where id=' '"))
+    checkAnswer(sql("select * from expression_test where number=null"), sql("select * from expression_test_hive where number=null"))
+    checkAnswer(sql("select * from expression_test where id=3"), sql("select * from expression_test_hive where id=3"))
+    checkAnswer(sql("select * from expression where number=null"), sql("select * from expression_hive where number=null"))
+    checkAnswer(sql("select * from expression where id=2"), sql("select * from expression_hive where id=2"))
+    checkAnswer(sql("select * from expression_test where id='2'"), sql("select * from expression_test_hive where id='2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)='2'"), sql("select * from expression_test_hive where cast(id as int)='2'"))
+    checkAnswer(sql("select * from expression where id='2'"), sql("select * from expression_hive where id='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)='null'"), sql("select * from expression_hive where cast(id as int)='null'"))
+  }
+
+  test("test to check not equals expression with null values") {
+    checkAnswer(sql("select * from expression_test where name != ''"), sql("select * from expression_test_hive where name != ''"))
+    checkAnswer(sql("select * from expression_test where name != ' '"), sql("select * from expression_test_hive where name != ' '"))
+    checkAnswer(sql("select * from expression_test where id=3"), sql("select * from expression_test_hive where id=3"))
+    checkAnswer(sql("select * from expression_test where number is not null"), sql("select * from expression_test_hive where number is not null"))
+    checkAnswer(sql("select * from expression where number is not null"), sql("select * from expression_hive where number is not null"))
+    checkAnswer(sql("select * from expression where id!=2"), sql("select * from expression_hive where id!=2"))
+    checkAnswer(sql("select * from expression_test where id!='2'"), sql("select * from expression_test_hive where id!='2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)!='2'"), sql("select * from expression_test_hive where cast(id as int)!='2'"))
+    checkAnswer(sql("select * from expression where id!='2'"), sql("select * from expression_hive where id!='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)!='2'"), sql("select * from expression_hive where cast(id as int)!='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)!='null'"), sql("select * from expression_hive where cast(id as int)!='null'"))
+  }
+
+  test("test to check greater than equals to expression with null values") {
+    checkAnswer(sql("select * from expression_test where id >= ''"), sql("select * from expression_test_hive where id >= ''"))
+    checkAnswer(sql("select * from expression_test where id >= ' '"), sql("select * from expression_test_hive where id >= ' '"))
+    checkAnswer(sql("select * from expression_test where number >= null"), sql("select * from expression_test_hive where number >= null"))
+    checkAnswer(sql("select * from expression where number >= null"), sql("select * from expression_hive where number >= null"))
+    checkAnswer(sql("select * from expression where id>=2"), sql("select * from expression_hive where id>=2"))
+    checkAnswer(sql("select * from expression_test where id>='2'"), sql("select * from expression_test_hive where id>='2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)>='2'"), sql("select * from expression_test_hive where cast(id as int)>='2'"))
+    checkAnswer(sql("select * from expression where id>='2'"), sql("select * from expression_hive where id>='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)>='2'"), sql("select * from expression_hive where cast(id as int)>='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)>='null'"), sql("select * from expression_hive where cast(id as int)>='null'"))
+  }
+
+  test("test to check less than equals to expression with null values") {
+    checkAnswer(sql("select * from expression_test where id <= ''"), sql("select * from expression_test_hive where id <= ''"))
+    checkAnswer(sql("select * from expression_test where id <= ' '"), sql("select * from expression_test_hive where id <= ' '"))
+    checkAnswer(sql("select * from expression_test where number <= null"), sql("select * from expression_test_hive where number <= null"))
+    checkAnswer(sql("select * from expression where number <= null"), sql("select * from expression_hive where number <= null"))
+    checkAnswer(sql("select * from expression where id<=2"), sql("select * from expression_hive where id<=2"))
+    checkAnswer(sql("select * from expression_test where id<='2'"), sql("select * from expression_test_hive where id<='2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)<='2'"), sql("select * from expression_test_hive where cast(id as int)<='2'"))
+    checkAnswer(sql("select * from expression where id<='2'"), sql("select * from expression_hive where id<='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)<='2'"), sql("select * from expression_hive where cast(id as int)<='2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)<='null'"), sql("select * from expression_hive where cast(id as int)<='null'"))
+  }
+
+  test("test to check greater than expression with null values") {
+    checkAnswer(sql("select * from expression_test where id > ''"), sql("select * from expression_test_hive where id > ''"))
+    checkAnswer(sql("select * from expression_test where id > ' '"), sql("select * from expression_test_hive where id > ' '"))
+    checkAnswer(sql("select * from expression_test where number > null"), sql("select * from expression_test_hive where number > null"))
+    checkAnswer(sql("select * from expression where number > null"), sql("select * from expression_hive where number > null"))
+    checkAnswer(sql("select * from expression where id>2"), sql("select * from expression_hive where id>2"))
+    checkAnswer(sql("select * from expression_test where id>'2'"), sql("select * from expression_test_hive where id>'2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)>'2'"), sql("select * from expression_test_hive where cast(id as int)>'2'"))
+    checkAnswer(sql("select * from expression where id>'2'"), sql("select * from expression_hive where id>'2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)>'2'"), sql("select * from expression_hive where cast(id as int)>'2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)>'null'"), sql("select * from expression_hive where cast(id as int)>'null'"))
+  }
+
+  test("test to check less than expression with null values") {
+    checkAnswer(sql("select * from expression_test where id < ''"), sql("select * from expression_test_hive where id < ''"))
+    checkAnswer(sql("select * from expression_test where id < ' '"), sql("select * from expression_test_hive where id < ' '"))
+    checkAnswer(sql("select * from expression_test where number < null"), sql("select * from expression_test_hive where number < null"))
+    checkAnswer(sql("select * from expression where number < null"), sql("select * from expression_hive where number < null"))
+    checkAnswer(sql("select * from expression where id<2"), sql("select * from expression_hive where id<2"))
+    checkAnswer(sql("select * from expression_test where id<'2'"), sql("select * from expression_test_hive where id<'2'"))
+    checkAnswer(sql("select * from expression_test where cast(id as int)<'2'"), sql("select * from expression_test_hive where cast(id as int)<'2'"))
+    checkAnswer(sql("select * from expression where id<'2'"), sql("select * from expression_hive where id<'2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)<'2'"), sql("select * from expression_hive where cast(id as int)<'2'"))
+    checkAnswer(sql("select * from expression where cast(id as int)<'null'"), sql("select * from expression_hive where cast(id as int)<'null'"))
+  }
+
+}


[2/2] incubator-carbondata git commit: [CARBONDATA-1009] Added check for null value in expression result. This closes #873

Posted by ra...@apache.org.
[CARBONDATA-1009] Added check for null value in expression result. This closes #873


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ccd8175f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ccd8175f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ccd8175f

Branch: refs/heads/master
Commit: ccd8175f868f6ce2b854f9d74614f51a8c035c82
Parents: 35a2869 58751fd
Author: ravipesala <ra...@gmail.com>
Authored: Thu May 4 11:44:21 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu May 4 11:44:21 2017 +0530

----------------------------------------------------------------------
 .../expression/conditional/InExpression.java    |  14 +-
 .../expression/conditional/NotInExpression.java |  12 +-
 .../src/test/resources/filter/datawithnull.csv  |   3 +
 .../test/resources/filter/datawithoutnull.csv   |   3 +
 .../ExpressionWithNullTestCase.scala            | 150 +++++++++++++++++++
 5 files changed, 180 insertions(+), 2 deletions(-)
----------------------------------------------------------------------