You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2019/04/03 02:23:24 UTC

[drill] branch master updated: DRILL-7152: During histogram creation handle the case when all values of a column are NULLs.

This is an automated email from the ASF dual-hosted git repository.

amansinha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 54384a9  DRILL-7152: During histogram creation handle the case when all values of a column are NULLs.
54384a9 is described below

commit 54384a992a0742aeab23afa82c7b7f4adcd388d3
Author: Aman Sinha <as...@maprtech.com>
AuthorDate: Tue Apr 2 14:30:26 2019 -0700

    DRILL-7152: During histogram creation handle the case when all values of a column are NULLs.
    
    close apache/drill#1730
---
 .../drill/exec/expr/fn/impl/TDigestFunctions.java  | 320 ++++++++++++---------
 .../org/apache/drill/exec/sql/TestAnalyze.java     |  36 +++
 2 files changed, 228 insertions(+), 128 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
index 3be70c2..041543b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
@@ -83,14 +83,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -138,14 +142,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -189,14 +197,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -244,14 +256,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -295,14 +311,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -350,14 +370,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -401,14 +425,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -456,14 +484,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -507,14 +539,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -562,14 +598,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -613,14 +653,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -668,14 +712,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -719,14 +767,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -774,14 +826,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -825,14 +881,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
@@ -880,14 +940,18 @@ public class TDigestFunctions {
       if (work.obj != null) {
         com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
         try {
-          int size = tdigest.smallByteSize();
-          java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
-          tdigest.asSmallBytes(byteBuf);
-          out.buffer = buffer.reallocIfNeeded(size);
-          out.start = 0;
-          out.end = size;
-          out.buffer.setBytes(0, byteBuf.array());
-          out.isSet = 1;
+          if (tdigest.size() > 0) {
+            int size = tdigest.smallByteSize();
+            java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+            tdigest.asSmallBytes(byteBuf);
+            out.buffer = buffer.reallocIfNeeded(size);
+            out.start = 0;
+            out.end = size;
+            out.buffer.setBytes(0, byteBuf.array());
+            out.isSet = 1;
+          } else {
+            out.isSet = 0;
+          }
         } catch (Exception e) {
           throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
         }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
index b32aa21..c44dfdd 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
@@ -429,6 +429,42 @@ public class TestAnalyze extends BaseTestQuery {
     }
   }
 
+  @Test
+  public void testHistogramWithColumnsWithAllNulls() throws Exception {
+    try {
+      test("ALTER SESSION SET `planner.slice_target` = 1");
+      test("ALTER SESSION SET `store.format` = 'parquet'");
+      test("CREATE TABLE dfs.tmp.all_nulls AS SELECT employee_id, cast(null as int) as null_int_col, "
+              + "cast(null as bigint) as null_bigint_col, cast(null as float) as null_float_col, "
+              + "cast(null as double) as null_double_col, cast(null as date) as null_date_col, "
+              + "cast(null as timestamp) as null_timestamp_col, cast(null as time) as null_time_col, "
+              + "cast(null as boolean) as null_boolean_col "
+              + "from cp.`employee.json` ");
+      test("ANALYZE TABLE dfs.tmp.all_nulls COMPUTE STATISTICS ");
+
+      testBuilder()
+              .sqlQuery("SELECT tbl.`columns`.`column` as `column`, "
+                      + " repeated_count(tbl.`columns`.`histogram`.`buckets`) as num_bucket_entries "
+                      + " from (select flatten(`directories`[0].`columns`) as `columns` "
+                      + "  from dfs.tmp.`all_nulls/.stats.drill`) as tbl")
+              .unOrdered()
+              .baselineColumns("column", "num_bucket_entries")
+              .baselineValues("`employee_id`", 11)
+              .baselineValues("`null_int_col`", 0)
+              .baselineValues("`null_bigint_col`", 0)
+              .baselineValues("`null_float_col`", 0)
+              .baselineValues("`null_double_col`", 0)
+              .baselineValues("`null_date_col`", 0)
+              .baselineValues("`null_timestamp_col`", 0)
+              .baselineValues("`null_time_col`", 0)
+              .baselineValues("`null_boolean_col`", 0)
+              .go();
+
+    } finally {
+      test("ALTER SESSION SET `planner.slice_target` = " + ExecConstants.SLICE_TARGET_DEFAULT);
+    }
+  }
+
   //Helper function to verify output of ANALYZE statement
   private void verifyAnalyzeOutput(String query, String message) throws Exception {
     List<QueryDataBatch>result = testRunAndReturn(QueryType.SQL, query);