You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/04/17 09:07:08 UTC
hive git commit: HIVE-18946: Fix columnstats merge NPE (Laszlo Bodor
via Zoltan Haindrich)
Repository: hive
Updated Branches:
refs/heads/master fa9e743e7 -> 54046353e
HIVE-18946: Fix columnstats merge NPE (Laszlo Bodor via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/54046353
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/54046353
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/54046353
Branch: refs/heads/master
Commit: 54046353e022d7aa5f3656de7d2a66f15608466d
Parents: fa9e743
Author: Laszlo Bodor <bo...@gmail.com>
Authored: Tue Apr 17 11:05:39 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Apr 17 11:06:33 2018 +0200
----------------------------------------------------------------------
.../clientpositive/stats_analyze_empty.q | 18 ++
.../clientpositive/stats_analyze_empty.q.out | 48 ++++
.../merge/DecimalColumnStatsMerger.java | 36 ++-
.../merge/DecimalColumnStatsMergerTest.java | 242 +++++++++++++++++++
4 files changed, 338 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/ql/src/test/queries/clientpositive/stats_analyze_empty.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_analyze_empty.q b/ql/src/test/queries/clientpositive/stats_analyze_empty.q
new file mode 100644
index 0000000..6ea6125
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_analyze_empty.q
@@ -0,0 +1,18 @@
+set hive.stats.autogather=true;
+set hive.explain.user=true;
+
+drop table if exists testdeci2;
+
+create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+stored as orc location '/tmp/testdeci2'
+TBLPROPERTIES ("transactional"="false")
+;
+
+
+analyze table testdeci2 compute statistics for columns;
+
+insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2');
http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_analyze_empty.q.out b/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
new file mode 100644
index 0000000..6eb51e9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_analyze_empty.q.out
@@ -0,0 +1,48 @@
+PREHOOK: query: drop table if exists testdeci2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testdeci2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+TBLPROPERTIES ("transactional"="false")
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+TBLPROPERTIES ("transactional"="false")
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testdeci2
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+PREHOOK: Output: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+POSTHOOK: Output: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testdeci2
+POSTHOOK: Lineage: testdeci2.amount SCRIPT []
+POSTHOOK: Lineage: testdeci2.id SCRIPT []
+POSTHOOK: Lineage: testdeci2.item SCRIPT []
+POSTHOOK: Lineage: testdeci2.sales_tax SCRIPT []
http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 01f3385..517ca72 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -31,15 +31,15 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
(DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats();
DecimalColumnStatsDataInspector newData =
(DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats();
- Decimal lowValue = aggregateData.getLowValue() != null
- && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
- .getLowValue() : newData.getLowValue();
+
+ Decimal lowValue = getMin(aggregateData.getLowValue(), newData.getLowValue());
aggregateData.setLowValue(lowValue);
- Decimal highValue = aggregateData.getHighValue() != null
- && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
- .getHighValue() : newData.getHighValue();
+
+ Decimal highValue = getMax(aggregateData.getHighValue(), newData.getHighValue());
aggregateData.setHighValue(highValue);
+
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+
if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
@@ -58,4 +58,28 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
aggregateData.setNumDVs(ndv);
}
}
+
+ Decimal getMax(Decimal firstValue, Decimal secondValue) {
+ if (firstValue == null && secondValue == null) {
+ return null;
+ }
+
+ if (firstValue != null && secondValue != null) {
+ return firstValue.compareTo(secondValue) > 0 ? firstValue : secondValue;
+ }
+
+ return firstValue == null ? secondValue : firstValue;
+ }
+
+ Decimal getMin(Decimal firstValue, Decimal secondValue) {
+ if (firstValue == null && secondValue == null) {
+ return null;
+ }
+
+ if (firstValue != null && secondValue != null) {
+ return firstValue.compareTo(secondValue) > 0 ? secondValue : firstValue;
+ }
+
+ return firstValue == null ? secondValue : firstValue;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/54046353/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
new file mode 100644
index 0000000..3b74d1e
--- /dev/null
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.columnstats.merge;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(MetastoreUnitTest.class)
+public class DecimalColumnStatsMergerTest {
+
+ private static final Decimal DECIMAL_3 = getDecimal(3, 0);
+ private static final Decimal DECIMAL_5 = getDecimal(5, 0);
+ private static final Decimal DECIMAL_20 = getDecimal(2, 1);
+
+ private DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger();
+
+ @Test
+ public void testMergeNullMinMaxValues() {
+ ColumnStatisticsObj objNulls = new ColumnStatisticsObj();
+ createData(objNulls, null, null);
+
+ merger.merge(objNulls, objNulls);
+
+ Assert.assertNull(objNulls.getStatsData().getDecimalStats().getLowValue());
+ Assert.assertNull(objNulls.getStatsData().getDecimalStats().getHighValue());
+ }
+
+ @Test
+ public void testMergeNonNullAndNullLowerValuesOldIsNull() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, null, null);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, DECIMAL_3, null);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+ }
+
+ @Test
+ public void testMergeNonNullAndNullLowerValuesNewIsNull() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, DECIMAL_3, null);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, null, null);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+ }
+
+ @Test
+ public void testMergeNonNullAndNullHigherValuesOldIsNull() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, null, null);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, null, DECIMAL_3);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue());
+ }
+
+ @Test
+ public void testMergeNonNullAndNullHigherValuesNewIsNull() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, null, DECIMAL_3);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, null, null);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue());
+ }
+
+ @Test
+ public void testMergeLowValuesFirstWins() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, DECIMAL_3, null);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, DECIMAL_5, null);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+ }
+
+ @Test
+ public void testMergeLowValuesSecondWins() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, DECIMAL_5, null);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, DECIMAL_3, null);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
+ }
+
+ @Test
+ public void testMergeHighValuesFirstWins() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, null, DECIMAL_5);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, null, DECIMAL_3);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue());
+ }
+
+ @Test
+ public void testMergeHighValuesSecondWins() {
+ ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
+ createData(oldObj, null, DECIMAL_3);
+
+ ColumnStatisticsObj newObj = new ColumnStatisticsObj();
+ createData(newObj, null, DECIMAL_5);
+
+ merger.merge(oldObj, newObj);
+
+ Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue());
+ }
+
+ @Test
+ public void testDecimalCompareEqual() {
+ Assert.assertTrue(DECIMAL_3.equals(DECIMAL_3));
+ }
+
+ @Test
+ public void testDecimalCompareDoesntEqual() {
+ Assert.assertTrue(!DECIMAL_3.equals(DECIMAL_5));
+ }
+
+ @Test
+ public void testCompareSimple() {
+ Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_3, DECIMAL_5));
+ }
+
+ @Test
+ public void testCompareSimpleFlipped() {
+ Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_5, DECIMAL_3));
+ }
+
+ @Test
+ public void testCompareSimpleReversed() {
+ Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, DECIMAL_5));
+ }
+
+ @Test
+ public void testCompareSimpleFlippedReversed() {
+ Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_5, DECIMAL_3));
+ }
+
+ /*
+ * it should pass, but fails because of HIVE-19131, get back to this later!
+ *
+ * @Test public void testCompareUnscaledValue() { Assert.assertEquals(DECIMAL_20,
+ * merger.compareValues(DECIMAL_3, DECIMAL_20)); }
+ */
+
+ @Test
+ public void testCompareNullsMin() {
+ Assert.assertNull(merger.getMin(null, null));
+ }
+
+ @Test
+ public void testCompareNullsMax() {
+ Assert.assertNull(merger.getMax(null, null));
+ }
+
+ @Test
+ public void testCompareFirstNullMin() {
+ Assert.assertEquals(DECIMAL_3, merger.getMin(null, DECIMAL_3));
+ }
+
+ @Test
+ public void testCompareSecondNullMin() {
+ Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, null));
+ }
+
+ @Test
+ public void testCompareFirstNullMax() {
+ Assert.assertEquals(DECIMAL_3, merger.getMax(null, DECIMAL_3));
+ }
+
+ @Test
+ public void testCompareSecondNullMax() {
+ Assert.assertEquals(DECIMAL_3, merger.getMax(DECIMAL_3, null));
+ }
+
+ private static Decimal getDecimal(int number, int scale) {
+ ByteBuffer bb = ByteBuffer.allocate(4);
+ bb.asIntBuffer().put(number);
+ return new Decimal(bb, (short) scale);
+ }
+
+ private DecimalColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Decimal lowValue,
+ Decimal highValue) {
+ ColumnStatisticsData statisticsData = new ColumnStatisticsData();
+ DecimalColumnStatsDataInspector data = new DecimalColumnStatsDataInspector();
+
+ statisticsData.setDecimalStats(data);
+ objNulls.setStatsData(statisticsData);
+
+ data.setLowValue(lowValue);
+ data.setHighValue(highValue);
+ return data;
+ }
+}