You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/04/17 09:07:08 UTC

hive git commit: HIVE-18946: Fix columnstats merge NPE (Laszlo Bodor via Zoltan Haindrich)

Repository: hive
Updated Branches:
  refs/heads/master fa9e743e7 -> 54046353e


HIVE-18946: Fix columnstats merge NPE (Laszlo Bodor via Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/54046353
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/54046353
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/54046353

Branch: refs/heads/master
Commit: 54046353e022d7aa5f3656de7d2a66f15608466d
Parents: fa9e743
Author: Laszlo Bodor <bo...@gmail.com>
Authored: Tue Apr 17 11:05:39 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Apr 17 11:06:33 2018 +0200

----------------------------------------------------------------------
 .../clientpositive/stats_analyze_empty.q        |  18 ++
 .../clientpositive/stats_analyze_empty.q.out    |  48 ++++
 .../merge/DecimalColumnStatsMerger.java         |  36 ++-
 .../merge/DecimalColumnStatsMergerTest.java     | 242 +++++++++++++++++++
 4 files changed, 338 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/ql/src/test/queries/clientpositive/stats_analyze_empty.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_analyze_empty.q b/ql/src/test/queries/clientpositive/stats_analyze_empty.q
new file mode 100644
index 0000000..6ea6125
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_analyze_empty.q
@@ -0,0 +1,18 @@
+set hive.stats.autogather=true;
+set hive.explain.user=true;
+
+drop table if exists testdeci2;
+
+create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+stored as orc location '/tmp/testdeci2'
+TBLPROPERTIES ("transactional"="false")
+;
+
+
+analyze table testdeci2 compute statistics for columns;
+
+insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2');

http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_analyze_empty.q.out b/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
new file mode 100644
index 0000000..6eb51e9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
@@ -0,0 +1,48 @@
+PREHOOK: query: drop table if exists testdeci2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testdeci2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+TBLPROPERTIES ("transactional"="false")
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+TBLPROPERTIES ("transactional"="false")
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testdeci2
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+PREHOOK: Output: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+POSTHOOK: Output: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testdeci2
+POSTHOOK: Lineage: testdeci2.amount SCRIPT []
+POSTHOOK: Lineage: testdeci2.id SCRIPT []
+POSTHOOK: Lineage: testdeci2.item SCRIPT []
+POSTHOOK: Lineage: testdeci2.sales_tax SCRIPT []

http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 01f3385..517ca72 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -31,15 +31,15 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
         (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats();
     DecimalColumnStatsDataInspector newData =
         (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats();
-    Decimal lowValue = aggregateData.getLowValue() != null
-        && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
-        .getLowValue() : newData.getLowValue();
+
+    Decimal lowValue = getMin(aggregateData.getLowValue(), newData.getLowValue());
     aggregateData.setLowValue(lowValue);
-    Decimal highValue = aggregateData.getHighValue() != null
-        && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
-        .getHighValue() : newData.getHighValue();
+
+    Decimal highValue = getMax(aggregateData.getHighValue(), newData.getHighValue());
     aggregateData.setHighValue(highValue);
+
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+
     if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
@@ -58,4 +58,28 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
       aggregateData.setNumDVs(ndv);
     }
   }
+
+  Decimal getMax(Decimal firstValue, Decimal secondValue) {
+    if (firstValue == null && secondValue == null) {
+      return null;
+    }
+
+    if (firstValue != null && secondValue != null) {
+      return firstValue.compareTo(secondValue) > 0 ? firstValue : secondValue;
+    }
+
+    return firstValue == null ? secondValue : firstValue;
+  }
+
+  Decimal getMin(Decimal firstValue, Decimal secondValue) {
+    if (firstValue == null && secondValue == null) {
+      return null;
+    }
+
+    if (firstValue != null && secondValue != null) {
+      return firstValue.compareTo(secondValue) > 0 ? secondValue : firstValue;
+    }
+
+    return firstValue == null ? secondValue : firstValue;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
new file mode 100644
index 0000000..3b74d1e
--- /dev/null
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.columnstats.merge;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(MetastoreUnitTest.class)
+public class DecimalColumnStatsMergerTest {
+
+  private static final Decimal DECIMAL_3 = getDecimal(3, 0);
+  private static final Decimal DECIMAL_5 = getDecimal(5, 0);
+  private static final Decimal DECIMAL_20 = getDecimal(2, 1);
+
+  private DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger();
+
+  @Test
+  public void testMergeNullMinMaxValues() {
+    ColumnStatisticsObj objNulls = new ColumnStatisticsObj();
+    createData(objNulls, null, null);
+
+    merger.merge(objNulls, objNulls);
+
+    Assert.assertNull(objNulls.getStatsData().getDecimalStats().getLowValue());
+    Assert.assertNull(objNulls.getStatsData().getDecimalStats().getHighValue());
+  }
+
+  @Test
+  public void testMergeNonNullAndNullLowerValuesOldIsNull() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, null, null);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, DECIMAL_3, null);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+  }
+
+  @Test
+  public void testMergeNonNullAndNullLowerValuesNewIsNull() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, DECIMAL_3, null);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, null, null);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+  }
+
+  @Test
+  public void testMergeNonNullAndNullHigherValuesOldIsNull() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, null, null);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, null, DECIMAL_3);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue());
+  }
+
+  @Test
+  public void testMergeNonNullAndNullHigherValuesNewIsNull() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, null, DECIMAL_3);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, null, null);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue());
+  }
+
+  @Test
+  public void testMergeLowValuesFirstWins() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, DECIMAL_3, null);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, DECIMAL_5, null);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+  }
+
+  @Test
+  public void testMergeLowValuesSecondWins() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, DECIMAL_5, null);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, DECIMAL_3, null);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+  }
+
+  @Test
+  public void testMergeHighValuesFirstWins() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, null, DECIMAL_5);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, null, DECIMAL_3);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue());
+  }
+
+  @Test
+  public void testMergeHighValuesSecondWins() {
+    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+    createData(oldObj, null, DECIMAL_3);
+
+    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+    createData(newObj, null, DECIMAL_5);
+
+    merger.merge(oldObj, newObj);
+
+    Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue());
+  }
+
+  @Test
+  public void testDecimalCompareEqual() {
+    Assert.assertTrue(DECIMAL_3.equals(DECIMAL_3));
+  }
+
+  @Test
+  public void testDecimalCompareDoesntEqual() {
+    Assert.assertTrue(!DECIMAL_3.equals(DECIMAL_5));
+  }
+
+  @Test
+  public void testCompareSimple() {
+    Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_3, DECIMAL_5));
+  }
+
+  @Test
+  public void testCompareSimpleFlipped() {
+    Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_5, DECIMAL_3));
+  }
+
+  @Test
+  public void testCompareSimpleReversed() {
+    Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, DECIMAL_5));
+  }
+
+  @Test
+  public void testCompareSimpleFlippedReversed() {
+    Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_5, DECIMAL_3));
+  }
+
+  /*
+   * it should pass, but fails because of HIVE-19131, get back to this later!
+   *
+   * @Test public void testCompareUnscaledValue() { Assert.assertEquals(DECIMAL_20,
+   * merger.compareValues(DECIMAL_3, DECIMAL_20)); }
+   */
+
+  @Test
+  public void testCompareNullsMin() {
+    Assert.assertNull(merger.getMin(null, null));
+  }
+
+  @Test
+  public void testCompareNullsMax() {
+    Assert.assertNull(merger.getMax(null, null));
+  }
+
+  @Test
+  public void testCompareFirstNullMin() {
+    Assert.assertEquals(DECIMAL_3, merger.getMin(null, DECIMAL_3));
+  }
+
+  @Test
+  public void testCompareSecondNullMin() {
+    Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, null));
+  }
+
+  @Test
+  public void testCompareFirstNullMax() {
+    Assert.assertEquals(DECIMAL_3, merger.getMax(null, DECIMAL_3));
+  }
+
+  @Test
+  public void testCompareSecondNullMax() {
+    Assert.assertEquals(DECIMAL_3, merger.getMax(DECIMAL_3, null));
+  }
+
+  private static Decimal getDecimal(int number, int scale) {
+    ByteBuffer bb = ByteBuffer.allocate(4);
+    bb.asIntBuffer().put(number);
+    return new Decimal(bb, (short) scale);
+  }
+
+  private DecimalColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Decimal lowValue,
+      Decimal highValue) {
+    ColumnStatisticsData statisticsData = new ColumnStatisticsData();
+    DecimalColumnStatsDataInspector data = new DecimalColumnStatsDataInspector();
+
+    statisticsData.setDecimalStats(data);
+    objNulls.setStatsData(statisticsData);
+
+    data.setLowValue(lowValue);
+    data.setHighValue(highValue);
+    return data;
+  }
+}