You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2018/02/24 01:14:34 UTC

[4/5] drill git commit: DRILL-6154: NaN, Infinity issues

DRILL-6154: NaN, Infinity issues

- changed comparison rules for NaN, Infinity values. For now NaN is the biggest value, Infinity - second biggest value
- fixed min, max, trunc functions for NaN, Infinity values
- made drill use original sqrt function instead of power(x,0.5) substitution

close apache/drill#1123


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/bcd358b3
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/bcd358b3
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/bcd358b3

Branch: refs/heads/master
Commit: bcd358b3c73f34381b543789dfc368bfaefa4ea9
Parents: b4d2a77
Author: vladimir tkach <vo...@gmail.com>
Authored: Fri Feb 16 16:36:49 2018 +0200
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Feb 23 14:18:15 2018 -0800

----------------------------------------------------------------------
 .../codegen/templates/AggrTypeFunctions1.java   | 23 ++++++--
 .../codegen/templates/ComparisonFunctions.java  | 13 ++++-
 .../main/codegen/templates/MathFunctions.java   | 16 ++++--
 .../exec/planner/sql/DrillConvertletTable.java  | 15 ++++++
 .../org/apache/drill/TestExampleQueries.java    |  1 -
 .../fn/impl/TestMathFunctionsWithNanInf.java    | 22 +++++---
 .../vector/complex/writer/TestJsonNanInf.java   | 57 ++++++++++++++++++++
 7 files changed, 129 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java b/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java
index b363cd1..fa4e8f1 100644
--- a/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java
+++ b/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java
@@ -71,9 +71,9 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu
 	  <#elseif type.runningType?starts_with("BigInt")>
 	    value.value = Long.MAX_VALUE;
 	  <#elseif type.runningType?starts_with("Float4")>
-		value.value = Float.MAX_VALUE;
+		value.value = Float.NaN;
 	  <#elseif type.runningType?starts_with("Float8")>
-		value.value = Double.MAX_VALUE;	    
+		value.value = Double.NaN;
 	  </#if>
 	<#elseif aggrtype.funcName == "max">
     <#if type.runningType?starts_with("Bit")>
@@ -101,8 +101,21 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu
 	    }
 	  </#if>
     nonNullCount.value = 1;
+	// For min/max functions: NaN is the biggest value,
+    // Infinity is the second biggest value
+    // -Infinity is the smallest  value
 	  <#if aggrtype.funcName == "min">
-	    value.value = Math.min(value.value, in.value);
+	    <#if type.inputType?contains("Float4")>
+		if(!Float.isNaN(in.value)) {
+		  value.value = Float.isNaN(value.value) ? in.value : Math.min(value.value, in.value);
+		}
+	    <#elseif type.inputType?contains("Float8")>
+	    if(!Double.isNaN(in.value)) {
+	      value.value = Double.isNaN(value.value) ? in.value : Math.min(value.value, in.value);
+	    }
+        <#else>
+		value.value = Math.min(value.value, in.value);
+		</#if>
 	  <#elseif aggrtype.funcName == "max">
 	    value.value = Math.max(value.value,  in.value);
 	  <#elseif aggrtype.funcName == "sum">
@@ -138,9 +151,9 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu
 	  <#elseif type.runningType?starts_with("BigInt")>
 	    value.value = Long.MAX_VALUE;
 	  <#elseif type.runningType?starts_with("Float4")>
-		value.value = Float.MAX_VALUE;
+		value.value = Float.NaN;
 	  <#elseif type.runningType?starts_with("Float8")>
-		value.value = Double.MAX_VALUE;	    
+		value.value = Double.NaN;
 	  </#if>
 	<#elseif aggrtype.funcName == "max">
 	  <#if type.runningType?starts_with("Int")>

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
index 633bb56..12a4ef2 100644
--- a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
+++ b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
@@ -118,9 +118,18 @@
       {
         <@compareNullsSubblock leftType=leftType rightType=rightType
           output="out.value" breakTarget="outside" nullCompare=true nullComparesHigh=nullComparesHigh />
-
+    // NaN is the biggest possible value, and NaN == NaN
     <#if mode == "primitive">
-      ${output} = left.value < right.value ? -1 : (left.value == right.value ? 0 : 1);
+      if(Double.isNaN(left.value) && Double.isNaN(right.value)) {
+        ${output} = 0;
+      } else if (!Double.isNaN(left.value) && Double.isNaN(right.value)) {
+        ${output} = -1;
+      } else if (Double.isNaN(left.value) && !Double.isNaN(right.value)) {
+        ${output} = 1;
+      } else {
+        ${output} = left.value < right.value ? -1 : (left.value == right.value ? 0 : 1);
+      }
+
     <#elseif mode == "varString">
       ${output} = org.apache.drill.exec.expr.fn.impl.ByteFunctionHelpers.compare(
           left.buffer, left.start, left.end, right.buffer, right.start, right.end );

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/MathFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/MathFunctions.java b/exec/java-exec/src/main/codegen/templates/MathFunctions.java
index 548c10e..0745aee 100644
--- a/exec/java-exec/src/main/codegen/templates/MathFunctions.java
+++ b/exec/java-exec/src/main/codegen/templates/MathFunctions.java
@@ -67,7 +67,11 @@ public class GMathFunctions{
   public static class ${func.className}${type.input} implements DrillSimpleFunc {
 
     @Param ${type.input}Holder in;
+  <#if func.funcName == 'sqrt'>
+    @Output Float8Holder out;
+  <#else>
     @Output ${type.outputType}Holder out;
+  </#if>
 
     public void setup() {
     }
@@ -76,12 +80,16 @@ public class GMathFunctions{
 
       <#if func.funcName=='trunc'>
       <#if type.roundingRequired ??>
-      java.math.BigDecimal bd = java.math.BigDecimal.valueOf(in.value).setScale(0, java.math.BigDecimal.ROUND_DOWN);
-      out.value = <#if type.extraCast ??>(${type.extraCast})</#if>bd.${type.castType}Value();
+      if (Double.isInfinite(in.value) || Double.isNaN(in.value)){
+        out.value = <#if type.input='Float8'> Double.NaN <#else> Float.NaN </#if>;
+      } else {
+        java.math.BigDecimal bd = java.math.BigDecimal.valueOf(in.value).setScale(0, java.math.BigDecimal.ROUND_DOWN);
+        out.value = <#if type.extraCast ??>(${type.extraCast})</#if>bd.${type.castType}Value();
+      }
       <#else>
-      out.value = (${type.castType}) (in.value);</#if>
+      out.value = <#if func.funcName!='sqrt'>(${type.castType})</#if> (in.value);</#if>
       <#else>
-      out.value = (${type.castType}) ${func.javaFunc}(in.value);
+      out.value =  <#if func.funcName!='sqrt'>(${type.castType})</#if> ${func.javaFunc}(in.value);
       </#if>
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java
index 7b66d12..bacf5c2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java
@@ -19,11 +19,17 @@ package org.apache.drill.exec.planner.sql;
 
 import java.util.HashMap;
 
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.sql.SqlBasicCall;
 import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlLiteral;
+import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql2rel.SqlRexContext;
 import org.apache.calcite.sql2rel.SqlRexConvertlet;
 import org.apache.calcite.sql2rel.SqlRexConvertletTable;
 import org.apache.calcite.sql2rel.StandardConvertletTable;
@@ -34,10 +40,19 @@ public class DrillConvertletTable implements SqlRexConvertletTable{
   public static HashMap<SqlOperator, SqlRexConvertlet> map = new HashMap<>();
 
   public static SqlRexConvertletTable INSTANCE = new DrillConvertletTable();
+  private static SqlRexConvertlet sqrtConvertlet = new SqlRexConvertlet() {
+    public RexNode convertCall(SqlRexContext cx, SqlCall call) {
+      RexNode operand = cx.convertExpression(call.operand(0));
+      return cx.getRexBuilder().makeCall(SqlStdOperatorTable.SQRT, operand);
+    }
+  };
 
   static {
     // Use custom convertlet for extract function
     map.put(SqlStdOperatorTable.EXTRACT, DrillExtractConvertlet.INSTANCE);
+    // sqrt needs it's own convertlet because calcite overrides it to power(x,0.5)
+    // which is not suitable for Infinity value case
+    map.put(SqlStdOperatorTable.SQRT, sqrtConvertlet);
     map.put(SqlStdOperatorTable.AVG, new DrillAvgVarianceConvertlet(SqlKind.AVG));
     map.put(SqlStdOperatorTable.STDDEV_POP, new DrillAvgVarianceConvertlet(SqlKind.STDDEV_POP));
     map.put(SqlStdOperatorTable.STDDEV_SAMP, new DrillAvgVarianceConvertlet(SqlKind.STDDEV_SAMP));

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index 7de64c9..732884c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -28,7 +28,6 @@ import org.apache.drill.categories.PlannerTest;
 import org.apache.drill.categories.SqlFunctionTest;
 import org.apache.drill.categories.UnlikelyTest;
 import org.apache.drill.common.types.TypeProtos;
-import org.apache.drill.common.util.DrillFileUtils;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.test.BaseTestQuery;
 import org.junit.BeforeClass;

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
index 4003bbc..b692c9f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
@@ -299,13 +299,23 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
     @Test
     public void testSqrtFunction() throws Exception {
       String table_name = "nan_test.json";
-      String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
-      String query = String.format("select sqrt(nan_col) as nan_col, sqrt(inf_col) as inf_col from dfs.`%s`", table_name);
-      String[] columns = {"nan_col", "inf_col"};
-      Object[] values = {Double.NaN, Double.POSITIVE_INFINITY};
+      String json = "{\"nan_col\":NaN, \"inf_col\":-Infinity, \"pos_inf_col\":Infinity}";
+      String query = String.format("select sqrt(nan_col) as nan_col, sqrt(inf_col) as inf_col, sqrt(pos_inf_col) as pos_inf from dfs.`%s`", table_name);
+      String[] columns = {"nan_col", "inf_col", "pos_inf"};
+      Object[] values = {Double.NaN, Double.NaN, Double.POSITIVE_INFINITY};
       evalTest(table_name, json, query, columns, values);
     }
 
+  @Test
+  public void testSqrtNestedFunction() throws Exception {
+    String table_name = "nan_test.json";
+    String json = "{\"nan_col\":NaN, \"inf_col\":-Infinity, \"pos_inf_col\":Infinity}";
+    String query = String.format("select sqrt(sin(nan_col)) as nan_col, sqrt(sin(inf_col)) as inf_col, sqrt(sin(pos_inf_col)) as pos_inf from dfs.`%s`", table_name);
+    String[] columns = {"nan_col", "inf_col", "pos_inf"};
+    Object[] values = {Double.NaN, Double.NaN, Double.NaN};
+    evalTest(table_name, json, query, columns, values);
+  }
+
     @Test
     public void testCeilFunction() throws Exception {
       String table_name = "nan_test.json";
@@ -390,7 +400,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
       public void testTruncFunction() throws Exception {
       String table_name = "nan_test.json";
       String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
-      String query = String.format("select trunc(nan_col,3) as nan_col, trunc(inf_col,3) as inf_col from dfs.`%s`", table_name);
+      String query = String.format("select trunc(nan_col,3) as nan_col, trunc(inf_col) as inf_col from dfs.`%s`", table_name);
       String[] columns = {"nan_col", "inf_col"};
       Object[] values = {Double.NaN, Double.NaN};
       evalTest(table_name, json, query, columns, values);
@@ -496,7 +506,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
               "{\"nan_col\":5.0, \"inf_col\":5.0}]";
       String query = String.format("select min(nan_col) as nan_col, min(inf_col) as inf_col from dfs.`%s`", table_name);
       String[] columns = {"nan_col", "inf_col"};
-      Object[] values = {Double.NaN, 5.0};
+      Object[] values = {5.0, 5.0};
       evalTest(table_name, json, query, columns, values);
     }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
index 2d19c17..95848c4 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
@@ -274,4 +274,61 @@ public class TestJsonNanInf extends BaseTestQuery {
           .run();
   }
 
+  @Test
+  public void testOrderByWithNaN() throws Exception {
+    String table_name = "nan_test.json";
+    String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" +
+        "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" +
+        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" +
+        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}";
+    String query = String.format("SELECT name, attr4 from dfs.`%s` order by name, attr4 ", table_name);
+
+    File file = new File(dirTestWatcher.getRootDir(), table_name);
+    try {
+      FileUtils.writeStringToFile(file, json);
+      test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .baselineColumns("name", "attr4")
+          .baselineValues("obj1", Double.POSITIVE_INFINITY)
+          .baselineValues("obj1", Double.NaN)
+          .baselineValues("obj2", Double.NEGATIVE_INFINITY)
+          .baselineValues("obj2", Double.NaN)
+          .build()
+          .run();
+    } finally {
+      test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
+  @Test
+  public void testInnerJoinWithNaN() throws Exception {
+    String table_name = "nan_test.json";
+    String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" +
+        "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" +
+        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" +
+        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}";
+    String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s` " +
+        " tt on t.attr4 = tt.attr4 ", table_name, table_name);
+
+    File file = new File(dirTestWatcher.getRootDir(), table_name);
+    try {
+      FileUtils.writeStringToFile(file, json);
+      test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .baselineColumns("name")
+          .baselineValues("obj1")
+          .baselineValues("obj2")
+          .build()
+          .run();
+    } finally {
+      test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
 }