You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2019/04/03 02:23:24 UTC
[drill] branch master updated: DRILL-7152: During histogram
creation handle the case when all values of a column are NULLs.
This is an automated email from the ASF dual-hosted git repository.
amansinha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 54384a9 DRILL-7152: During histogram creation handle the case when all values of a column are NULLs.
54384a9 is described below
commit 54384a992a0742aeab23afa82c7b7f4adcd388d3
Author: Aman Sinha <as...@maprtech.com>
AuthorDate: Tue Apr 2 14:30:26 2019 -0700
DRILL-7152: During histogram creation handle the case when all values of a column are NULLs.
close apache/drill#1730
---
.../drill/exec/expr/fn/impl/TDigestFunctions.java | 320 ++++++++++++---------
.../org/apache/drill/exec/sql/TestAnalyze.java | 36 +++
2 files changed, 228 insertions(+), 128 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
index 3be70c2..041543b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/TDigestFunctions.java
@@ -83,14 +83,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -138,14 +142,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -189,14 +197,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -244,14 +256,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -295,14 +311,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -350,14 +370,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -401,14 +425,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -456,14 +484,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -507,14 +539,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -562,14 +598,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -613,14 +653,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -668,14 +712,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -719,14 +767,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -774,14 +826,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -825,14 +881,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
@@ -880,14 +940,18 @@ public class TDigestFunctions {
if (work.obj != null) {
com.clearspring.analytics.stream.quantile.TDigest tdigest = (com.clearspring.analytics.stream.quantile.TDigest) work.obj;
try {
- int size = tdigest.smallByteSize();
- java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
- tdigest.asSmallBytes(byteBuf);
- out.buffer = buffer.reallocIfNeeded(size);
- out.start = 0;
- out.end = size;
- out.buffer.setBytes(0, byteBuf.array());
- out.isSet = 1;
+ if (tdigest.size() > 0) {
+ int size = tdigest.smallByteSize();
+ java.nio.ByteBuffer byteBuf = java.nio.ByteBuffer.allocate(size);
+ tdigest.asSmallBytes(byteBuf);
+ out.buffer = buffer.reallocIfNeeded(size);
+ out.start = 0;
+ out.end = size;
+ out.buffer.setBytes(0, byteBuf.array());
+ out.isSet = 1;
+ } else {
+ out.isSet = 0;
+ }
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Failed to get TDigest output", e);
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
index b32aa21..c44dfdd 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
@@ -429,6 +429,42 @@ public class TestAnalyze extends BaseTestQuery {
}
}
+ @Test
+ public void testHistogramWithColumnsWithAllNulls() throws Exception {
+ try {
+ test("ALTER SESSION SET `planner.slice_target` = 1");
+ test("ALTER SESSION SET `store.format` = 'parquet'");
+ test("CREATE TABLE dfs.tmp.all_nulls AS SELECT employee_id, cast(null as int) as null_int_col, "
+ + "cast(null as bigint) as null_bigint_col, cast(null as float) as null_float_col, "
+ + "cast(null as double) as null_double_col, cast(null as date) as null_date_col, "
+ + "cast(null as timestamp) as null_timestamp_col, cast(null as time) as null_time_col, "
+ + "cast(null as boolean) as null_boolean_col "
+ + "from cp.`employee.json` ");
+ test("ANALYZE TABLE dfs.tmp.all_nulls COMPUTE STATISTICS ");
+
+ testBuilder()
+ .sqlQuery("SELECT tbl.`columns`.`column` as `column`, "
+ + " repeated_count(tbl.`columns`.`histogram`.`buckets`) as num_bucket_entries "
+ + " from (select flatten(`directories`[0].`columns`) as `columns` "
+ + " from dfs.tmp.`all_nulls/.stats.drill`) as tbl")
+ .unOrdered()
+ .baselineColumns("column", "num_bucket_entries")
+ .baselineValues("`employee_id`", 11)
+ .baselineValues("`null_int_col`", 0)
+ .baselineValues("`null_bigint_col`", 0)
+ .baselineValues("`null_float_col`", 0)
+ .baselineValues("`null_double_col`", 0)
+ .baselineValues("`null_date_col`", 0)
+ .baselineValues("`null_timestamp_col`", 0)
+ .baselineValues("`null_time_col`", 0)
+ .baselineValues("`null_boolean_col`", 0)
+ .go();
+
+ } finally {
+ test("ALTER SESSION SET `planner.slice_target` = " + ExecConstants.SLICE_TARGET_DEFAULT);
+ }
+ }
+
//Helper function to verify output of ANALYZE statement
private void verifyAnalyzeOutput(String query, String message) throws Exception {
List<QueryDataBatch>result = testRunAndReturn(QueryType.SQL, query);