You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/09/22 05:36:28 UTC

[30/50] [abbrv] incubator-carbondata git commit: [CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used.

[CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used.


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/c083264a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/c083264a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/c083264a

Branch: refs/heads/branch-0.1
Commit: c083264a730506bc44fc6387e378d08cab8cc334
Parents: 8b6429a
Author: sujith71955 <su...@gmail.com>
Authored: Sun Sep 18 04:01:10 2016 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Sep 22 10:02:11 2016 +0530

----------------------------------------------------------------------
 .../conditional/EqualToExpression.java          |  3 ++-
 .../carbondata/scan/filter/FilterUtil.java      | 21 +++++++++++++++++++-
 .../test/resources/Test_Data1_Logrithmic.csv    |  3 +++
 .../GrtLtFilterProcessorTestCase.scala          | 11 ++++++++++
 4 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
index 12a3e32..8f7fa0a 100644
--- a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
@@ -24,6 +24,7 @@ import org.apache.carbondata.scan.expression.Expression;
 import org.apache.carbondata.scan.expression.ExpressionResult;
 import org.apache.carbondata.scan.expression.exception.FilterIllegalMemberException;
 import org.apache.carbondata.scan.expression.exception.FilterUnsupportedException;
+import org.apache.carbondata.scan.filter.FilterUtil;
 import org.apache.carbondata.scan.filter.intf.ExpressionType;
 import org.apache.carbondata.scan.filter.intf.RowIntf;
 
@@ -78,7 +79,7 @@ public class EqualToExpression extends BinaryConditionalExpression {
         result = val1.getInt().equals(val2.getInt());
         break;
       case DOUBLE:
-        result = val1.getDouble().equals(val2.getDouble());
+        result = FilterUtil.nanSafeEqualsDoubles(val1.getDouble(), val2.getDouble());
         break;
       case TIMESTAMP:
         result = val1.getTime().equals(val2.getTime());

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
index 71ac1bf..b7cacb1 100644
--- a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
@@ -1390,6 +1390,26 @@ public final class FilterUtil {
   }
 
   /**
+   * This method will compare double values for its equality and also it will preserve
+   * the -0.0 and 0.0 equality as per == ,also preserve NaN equality check as per
+   * java.lang.Double.equals()
+   *
+   * @param d1 double value for equality check
+   * @param d2 double value for equality check
+   * @return boolean after comparing two double values.
+   */
+  public static boolean nanSafeEqualsDoubles(Double d1, Double d2) {
+    Boolean xIsNan = Double.isNaN(d1);
+    Boolean yIsNan = Double.isNaN(d2);
+    if ((xIsNan && yIsNan) || (d1.doubleValue() == d2.doubleValue())) {
+
+      return true;
+    }
+    return false;
+
+  }
+
+  /**
    * This method will prepare the list with all unknown expressions
    *
    * @param expression
@@ -1406,5 +1426,4 @@ public final class FilterUtil {
       getUnknownExpressionsList(child, lst);
     }
   }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
new file mode 100644
index 0000000..0f0312d
--- /dev/null
+++ b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
@@ -0,0 +1,3 @@
+c1_int,c2_Bigint,c3_Decimal,c4_double,c5_string,c6_Timestamp,c7_Datatype_Desc
+2147483646,9223372036854775807,0.12345678900987654321123456789012345638,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon file format is a columnar store in HDFS  it has many features that a modern columnar format hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following unique features Stores data along with index: it can signi
 ficantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file.Operable encoded data Through supporting efficient compression and global encoding schemes  can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format. This reduces the row reconstruction cost at query time Supports for various use cases with one single Data format  like interactive OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy 
 Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone  it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encodin
 g schemes  can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format This reduces the row reconstruction cost at query time Supports for various use cases with one single  Sequential Access Carbon has following unique features Stores data along with index: it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding schemes  can query on compressed encoded dataz the data can be converted to encoded data just
  before returning the results to the users which is Column group Allow multiple columns to form a column group that would be stored as row format Allow multiple columns to form format Allow carbon is fu,2017-07-01 12:07:28,Max_range_values-1
+2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon file format is a columnar store in HDFS  it has many features that a modern columnar format hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following unique features Stores data along with index: it can signifi
 cantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file.Operable encoded data Through supporting efficient compression and global encoding schemes  can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format. This reduces the row reconstruction cost at query time Supports for various use cases with one single Data format  like interactive OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Ta
 ble Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone  it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding 
 schemes  can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format This reduces the row reconstruction cost at query time Supports for various use cases with one single  Sequential Access Carbon has following unique features Stores data along with index: it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding schemes  can query on compressed encoded dataz the data can be converted to encoded data just b
 efore returning the results to the users which is Column group Allow multiple columns to form a column group that would be stored as row format Allow multiple columns to form format Allow carbon is f,2017-07-01 12:07:28,Max_range_values-2

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
index 5278344..b33b65f 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
@@ -38,6 +38,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
     sql("drop table if exists a12")
     sql("drop table if exists a12_allnull")
     sql("drop table if exists a12_no_null")
+     sql("drop table if exists Test_Boundary1")
 
     sql(
       "create table a12(empid String,ename String,sal double,deptno int,mgr string,gender string," +
@@ -53,6 +54,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
         " string," +
         "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'"
     )
+    sql("create table Test_Boundary1 (c1_int int,c2_Bigint Bigint,c3_Decimal Decimal(38,38),c4_double double,c5_string string,c6_Timestamp Timestamp,c7_Datatype_Desc string) STORED BY 'org.apache.carbondata.format'")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss")
     val basePath = new File(this.getClass.getResource("/").getPath + "/../../")
@@ -77,6 +79,9 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
          'QUOTECHAR'='"')"""
         .stripMargin
     )
+    
+    sql(
+      s"LOAD DATA INPATH './src/test/resources/Test_Data1_Logrithmic.csv' INTO table Test_Boundary1 OPTIONS('DELIMITER'=',','QUOTECHAR'='','FILEHEADER'='')")
   }
   //mixed value test
   test("Less Than Filter") {
@@ -99,6 +104,12 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
       Seq(Row(3))
     )
   }
+  test("0.0 and -0.0 equality check for double data type applying log function") {
+    checkAnswer(
+      sql("select log(c4_double,1) from Test_Boundary1 where log(c4_double,1)= -0.0"),
+      Seq(Row(0.0),Row(0.0))
+    )
+  }
 
   test("Greater Than equal to Filter") {
     sql("select count (empid) from a12 where dob >= '2014-07-01 12:07:28'").show()