You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dm...@apache.org on 2020/05/12 20:18:28 UTC
[hive] branch master updated: HIVE-23053: Clean Up Stats Mergers
(David Mollitor, reviewed by Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
dmollitor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 2ff6370 HIVE-23053: Clean Up Stats Mergers (David Mollitor, reviewed by Ashutosh Chauhan)
2ff6370 is described below
commit 2ff637022977bc9ac7c9aef5681447101bad828e
Author: David Mollitor <dm...@apache.org>
AuthorDate: Tue May 12 16:16:32 2020 -0400
HIVE-23053: Clean Up Stats Mergers (David Mollitor, reviewed by Ashutosh Chauhan)
---
.../columnstats/merge/BinaryColumnStatsMerger.java | 2 +-
.../merge/BooleanColumnStatsMerger.java | 2 +-
.../columnstats/merge/ColumnStatsMerger.java | 10 +++--
.../columnstats/merge/DateColumnStatsMerger.java | 44 ++++++++++-----------
.../merge/DecimalColumnStatsMerger.java | 45 +++++++++++-----------
.../columnstats/merge/DoubleColumnStatsMerger.java | 33 +++++++++++-----
.../columnstats/merge/LongColumnStatsMerger.java | 36 +++++++++++------
.../columnstats/merge/StringColumnStatsMerger.java | 8 ++--
.../merge/TimestampColumnStatsMerger.java | 43 ++++++++++-----------
9 files changed, 126 insertions(+), 97 deletions(-)
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
index 1c2402f..52377b4 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
public class BinaryColumnStatsMerger extends ColumnStatsMerger {
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats();
BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats();
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
index fd6b87a..0685334 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
public class BooleanColumnStatsMerger extends ColumnStatsMerger {
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
BooleanColumnStatsData aggregateData = aggregateColStats.getStatsData().getBooleanStats();
BooleanColumnStatsData newData = newColStats.getStatsData().getBooleanStats();
aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues());
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
index ce55756..8204770 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
@@ -24,8 +24,12 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class ColumnStatsMerger {
- protected final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class.getName());
+ protected final Logger log = LoggerFactory.getLogger(getClass());
- public abstract void merge(ColumnStatisticsObj aggregateColStats,
- ColumnStatisticsObj newColStats);
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ log.debug("Merging statistics: [aggregateColStats:{}, newColStats: {}]", aggregateColStats, newColStats);
+ doMerge(aggregateColStats, newColStats);
+ }
+
+ protected abstract void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats);
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
index c46e0a9..84962aa 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
@@ -21,14 +21,18 @@ package org.apache.hadoop.hive.metastore.columnstats.merge;
import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.dateInspectorFromStats;
+import org.apache.commons.lang3.ObjectUtils;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Date;
import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import com.google.common.base.MoreObjects;
+
public class DateColumnStatsMerger extends ColumnStatsMerger {
+
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats);
DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats);
@@ -41,7 +45,7 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
} else {
NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
NumDistinctValueEstimator newEst = newData.getNdvEstimator();
- long ndv = -1;
+ final long ndv;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
@@ -49,8 +53,8 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
- + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ log.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(),
+ aggregateData.getNumDVs(), newData.getNumDVs(), ndv);
aggregateData.setNumDVs(ndv);
}
@@ -58,36 +62,32 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
}
public void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) {
+ final Date aggregateLowValue = aggregateData.getLowValue();
+ final Date newLowValue = newData.getLowValue();
+
+ final Date mergedLowValue;
if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
return;
- }
-
- Date aggregateLowValue = aggregateData.getLowValue();
- Date newLowValue = newData.getLowValue();
-
- Date mergedLowValue = null;
- if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
- mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue;
+ } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ mergedLowValue = ObjectUtils.min(aggregateLowValue, newLowValue);
} else {
- mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue;
+ mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue);
}
aggregateData.setLowValue(mergedLowValue);
}
public void setHighValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) {
+ final Date aggregateHighValue = aggregateData.getHighValue();
+ final Date newHighValue = newData.getHighValue();
+
+ final Date mergedHighValue;
if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
return;
- }
-
- Date aggregateHighValue = aggregateData.getHighValue();
- Date newHighValue = newData.getHighValue();
-
- Date mergedHighValue = null;
- if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
- mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue;
+ } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ mergedHighValue = ObjectUtils.max(newHighValue, aggregateHighValue);
} else {
- mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue;
+ mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue);
}
aggregateData.setHighValue(mergedHighValue);
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index be8175f..eaa67c6 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -24,11 +24,16 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import com.google.common.base.MoreObjects;
+
import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats;
+import org.apache.commons.lang3.ObjectUtils;
+
public class DecimalColumnStatsMerger extends ColumnStatsMerger {
+
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DecimalColumnStatsDataInspector aggregateData = decimalInspectorFromStats(aggregateColStats);
DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(newColStats);
@@ -42,7 +47,7 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
} else {
NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
NumDistinctValueEstimator newEst = newData.getNdvEstimator();
- long ndv = -1;
+ final long ndv;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
@@ -50,8 +55,8 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
- + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ log.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(),
+ aggregateData.getNumDVs(), newData.getNumDVs(), ndv);
aggregateData.setNumDVs(ndv);
}
@@ -59,36 +64,32 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
}
public void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) {
+ final Decimal aggregateLowValue = aggregateData.getLowValue();
+ final Decimal newLowValue = newData.getLowValue();
+
+ final Decimal mergedLowValue;
if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
return;
- }
-
- Decimal aggregateLowValue = aggregateData.getLowValue();
- Decimal newLowValue = newData.getLowValue();
-
- Decimal mergedLowValue = null;
- if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
- mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue;
+ } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue);
} else {
- mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue;
+ mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue);
}
aggregateData.setLowValue(mergedLowValue);
}
public void setHighValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) {
+ final Decimal aggregateHighValue = aggregateData.getHighValue();
+ final Decimal newHighValue = newData.getHighValue();
+
+ final Decimal mergedHighValue;
if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
return;
- }
-
- Decimal aggregateHighValue = aggregateData.getHighValue();
- Decimal newHighValue = newData.getHighValue();
-
- Decimal mergedHighValue = null;
- if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
- mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue;
+ } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ mergedHighValue = ObjectUtils.max(aggregateHighValue, newHighValue);
} else {
- mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue;
+ mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue);
}
aggregateData.setHighValue(mergedHighValue);
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
index 41f9095..6f41232 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
@@ -26,8 +26,9 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataI
import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.doubleInspectorFromStats;
public class DoubleColumnStatsMerger extends ColumnStatsMerger {
+
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DoubleColumnStatsDataInspector aggregateData = doubleInspectorFromStats(aggregateColStats);
DoubleColumnStatsDataInspector newData = doubleInspectorFromStats(newColStats);
setLowValue(aggregateData, newData);
@@ -46,7 +47,7 @@ public class DoubleColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ log.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
@@ -55,22 +56,34 @@ public class DoubleColumnStatsMerger extends ColumnStatsMerger {
}
public void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) {
- if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ final double lowValue;
+
+ if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ lowValue = Math.min(aggregateData.getLowValue(), newData.getLowValue());
+ } else if (aggregateData.isSetLowValue()) {
+ lowValue = aggregateData.getLowValue();
+ } else if (newData.isSetLowValue()) {
+ lowValue = newData.getLowValue();
+ } else {
return;
}
- double lowValue = Math.min(
- aggregateData.isSetLowValue() ? aggregateData.getLowValue() : Double.MAX_VALUE,
- newData.isSetLowValue() ? newData.getLowValue() : Double.MAX_VALUE);
+
aggregateData.setLowValue(lowValue);
}
public void setHighValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) {
- if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ final double highValue;
+
+ if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ highValue = Math.max(aggregateData.getHighValue(), newData.getHighValue());
+ } else if (aggregateData.isSetHighValue()) {
+ highValue = aggregateData.getHighValue();
+ } else if (newData.isSetHighValue()) {
+ highValue = newData.getHighValue();
+ } else {
return;
}
- double highValue = Math.max(
- aggregateData.isSetHighValue() ? aggregateData.getHighValue() : Double.MIN_VALUE,
- newData.isSetHighValue() ? newData.getHighValue() : Double.MIN_VALUE);
+
aggregateData.setHighValue(highValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
index dfc8421..5a56c59 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
@@ -27,7 +27,7 @@ import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.lon
public class LongColumnStatsMerger extends ColumnStatsMerger {
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
LongColumnStatsDataInspector aggregateData = longInspectorFromStats(aggregateColStats);
LongColumnStatsDataInspector newData = longInspectorFromStats(newColStats);
setLowValue(aggregateData, newData);
@@ -38,7 +38,7 @@ public class LongColumnStatsMerger extends ColumnStatsMerger {
} else {
NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
NumDistinctValueEstimator newEst = newData.getNdvEstimator();
- long ndv = -1;
+ final long ndv;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
@@ -46,8 +46,8 @@ public class LongColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
- + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ log.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(),
+ aggregateData.getNumDVs(), newData.getNumDVs(), ndv);
aggregateData.setNumDVs(ndv);
}
@@ -55,22 +55,34 @@ public class LongColumnStatsMerger extends ColumnStatsMerger {
}
public void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) {
- if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ final long lowValue;
+
+ if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ lowValue = Math.min(aggregateData.getLowValue(), newData.getLowValue());
+ } else if (aggregateData.isSetLowValue()) {
+ lowValue = aggregateData.getLowValue();
+ } else if (newData.isSetLowValue()) {
+ lowValue = newData.getLowValue();
+ } else {
return;
}
- long lowValue = Math.min(
- aggregateData.isSetLowValue() ? aggregateData.getLowValue() : Long.MAX_VALUE,
- newData.isSetLowValue() ? newData.getLowValue() : Long.MAX_VALUE);
+
aggregateData.setLowValue(lowValue);
}
public void setHighValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) {
- if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ final long highValue;
+
+ if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ highValue = Math.max(aggregateData.getHighValue(), newData.getHighValue());
+ } else if (aggregateData.isSetHighValue()) {
+ highValue = aggregateData.getHighValue();
+ } else if (newData.isSetHighValue()) {
+ highValue = newData.getHighValue();
+ } else {
return;
}
- long highValue = Math.max(
- aggregateData.isSetHighValue() ? aggregateData.getHighValue() : Long.MIN_VALUE,
- newData.isSetHighValue() ? newData.getHighValue() : Long.MIN_VALUE);
+
aggregateData.setHighValue(highValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
index dec4485..1ef9e1c 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
@@ -27,7 +27,7 @@ import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.str
public class StringColumnStatsMerger extends ColumnStatsMerger {
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
StringColumnStatsDataInspector aggregateData = stringInspectorFromStats(aggregateColStats);
StringColumnStatsDataInspector newData = stringInspectorFromStats(newColStats);
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
@@ -38,7 +38,7 @@ public class StringColumnStatsMerger extends ColumnStatsMerger {
} else {
NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
NumDistinctValueEstimator newEst = newData.getNdvEstimator();
- long ndv = -1;
+ final long ndv;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
@@ -46,8 +46,8 @@ public class StringColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
- + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ log.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(),
+ aggregateData.getNumDVs(), newData.getNumDVs(), ndv);
aggregateData.setNumDVs(ndv);
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java
index 77827d9..748f738 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java
@@ -20,14 +20,17 @@ package org.apache.hadoop.hive.metastore.columnstats.merge;
import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.timestampInspectorFromStats;
+import org.apache.commons.lang3.ObjectUtils;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Timestamp;
import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector;
+import com.google.common.base.MoreObjects;
+
public class TimestampColumnStatsMerger extends ColumnStatsMerger {
@Override
- public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ protected void doMerge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
TimestampColumnStatsDataInspector aggregateData = timestampInspectorFromStats(aggregateColStats);
TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(newColStats);
@@ -40,7 +43,7 @@ public class TimestampColumnStatsMerger extends ColumnStatsMerger {
} else {
NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
NumDistinctValueEstimator newEst = newData.getNdvEstimator();
- long ndv = -1;
+ final long ndv;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
@@ -48,8 +51,8 @@ public class TimestampColumnStatsMerger extends ColumnStatsMerger {
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
- LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
- + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ log.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(),
+ aggregateData.getNumDVs(), newData.getNumDVs(), ndv);
aggregateData.setNumDVs(ndv);
}
@@ -57,36 +60,32 @@ public class TimestampColumnStatsMerger extends ColumnStatsMerger {
}
public void setLowValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) {
+ final Timestamp aggregateLowValue = aggregateData.getLowValue();
+ final Timestamp newLowValue = newData.getLowValue();
+
+ final Timestamp mergedLowValue;
if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
return;
- }
-
- Timestamp aggregateLowValue = aggregateData.getLowValue();
- Timestamp newLowValue = newData.getLowValue();
-
- Timestamp mergedLowValue = null;
- if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
- mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue;
+ } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue);
} else {
- mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue;
+ mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue);
}
aggregateData.setLowValue(mergedLowValue);
}
public void setHighValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) {
+ final Timestamp aggregateHighValue = aggregateData.getHighValue();
+ final Timestamp newHighValue = newData.getHighValue();
+
+ final Timestamp mergedHighValue;
if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
return;
- }
-
- Timestamp aggregateHighValue = aggregateData.getHighValue();
- Timestamp newHighValue = newData.getHighValue();
-
- Timestamp mergedHighValue = null;
- if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
- mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue;
+ } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ mergedHighValue = ObjectUtils.max(aggregateHighValue, newHighValue);
} else {
- mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue;
+ mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue);
}
aggregateData.setHighValue(mergedHighValue);