You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2017/02/08 21:59:46 UTC

[2/5] incubator-impala git commit: IMPALA-4738: STDDEV_SAMP should return NULL for single record input

IMPALA-4738: STDDEV_SAMP should return NULL for single record input

In calculating the STDDEV_SAMP/VARIANCE of N rows a divion by
N-1 rows is involved. Hence STDDEV_SAMP/VARIANCE for a single
row involves a division by 0. This change returns a NULL instead
of a 0 when calculating STDDEV_SAMP/VARIANCE for a single row.
STDDEV_POP/VARIANCE_POP for single row will still return a 0 since
this does not involve  a division by 0. This matches the postgres
behavior.

Change-Id: Ide8af752cd8a2e554a2cd5a1ec948967a80de1fe
Reviewed-on: http://gerrit.cloudera.org:8080/5800
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c671ee1e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c671ee1e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c671ee1e

Branch: refs/heads/master
Commit: c671ee1ece5fde1a5db40621cb27480dff74bc49
Parents: 3671175
Author: aphadke <ap...@cloudera.com>
Authored: Wed Jan 25 14:47:30 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 7 04:19:06 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/aggregate-functions-ir.cc                   |  4 ++--
 .../functional-query/queries/QueryTest/aggregation.test  | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c671ee1e/be/src/exprs/aggregate-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index 63f06bf..8505d43 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -1331,7 +1331,7 @@ DoubleVal AggregateFunctions::KnuthVarFinalize(
     FunctionContext* ctx, const StringVal& state_sv) {
   DCHECK(!state_sv.is_null);
   KnuthVarianceState* state = reinterpret_cast<KnuthVarianceState*>(state_sv.ptr);
-  if (state->count == 0) return DoubleVal::null();
+  if (state->count == 0 || state->count == 1) return DoubleVal::null();
   double variance = ComputeKnuthVariance(*state, false);
   return DoubleVal(variance);
 }
@@ -1350,7 +1350,7 @@ DoubleVal AggregateFunctions::KnuthStddevFinalize(FunctionContext* ctx,
   DCHECK(!state_sv.is_null);
   DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState));
   KnuthVarianceState* state = reinterpret_cast<KnuthVarianceState*>(state_sv.ptr);
-  if (state->count == 0) return DoubleVal::null();
+  if (state->count == 0 || state->count == 1) return DoubleVal::null();
   return sqrt(ComputeKnuthVariance(*state, false));
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c671ee1e/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
index 1b4d464..675c75f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
@@ -22,14 +22,15 @@ NULL,NULL,NULL,NULL
 double, double, double, double
 ====
 ---- QUERY
-# exactly 1 tuple processed (variance & stddev are 0)
-SELECT variance(tinyint_col), stddev(smallint_col), variance_pop(int_col),
-stddev_pop(bigint_col)
+# exactly 1 tuple processed (variance_pop & stddev_pop are 0, stddev and variance
+# are NULL)
+SELECT variance(tinyint_col), variance_samp(smallint_col), variance_pop(int_col),
+stddev(smallint_col), stddev_samp(smallint_col), stddev_pop(bigint_col)
 from alltypesagg WHERE id = 1006
 ---- RESULTS
-0,0,0,0
+NULL,NULL,0,NULL,NULL,0
 ---- TYPES
-double, double, double, double
+double, double, double, double, double, double
 ====
 ---- QUERY
 # Includes one row which is null, and test the aliases for variance() as well