You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mg...@apache.org on 2019/10/05 09:08:00 UTC
[hive] branch master updated: HIVE-22248 Fix statistics persisting
issues (Miklos Gergely reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
mgergely pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9502d06 HIVE-22248 Fix statistics persisting issues (Miklos Gergely reviewed by Jesus Camacho Rodriguez)
9502d06 is described below
commit 9502d06d2c36b80e9fe4ecf9d37e7b5d94d3b04e
Author: miklosgergely <mg...@cloudera.com>
AuthorDate: Wed Oct 2 11:07:58 2019 +0200
HIVE-22248 Fix statistics persisting issues (Miklos Gergely reviewed by Jesus Camacho Rodriguez)
---
.../clientpositive/alter_table_update_status.q.out | 26 ++++-----
...ter_table_update_status_disable_bitvector.q.out | 26 ++++-----
.../clientpositive/llap/vector_coalesce_3.q.out | 6 +-
.../results/clientpositive/vector_coalesce_3.q.out | 6 +-
.../columnstats/merge/DateColumnStatsMerger.java | 55 ++++++++++-------
.../merge/DecimalColumnStatsMerger.java | 55 ++++++++++-------
.../columnstats/merge/DoubleColumnStatsMerger.java | 26 ++++++++-
.../columnstats/merge/LongColumnStatsMerger.java | 26 ++++++++-
.../columnstats/merge/StringColumnStatsMerger.java | 2 +
.../merge/DecimalColumnStatsMergerTest.java | 68 ++++++++++++++++++----
10 files changed, 208 insertions(+), 88 deletions(-)
diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
index 6453391..e643863 100644
--- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
@@ -339,7 +339,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name s
data_type smallint
-min 0
+min 3
max 3
num_nulls 1
distinct_count 1
@@ -358,7 +358,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name i
data_type int
-min 0
+min 45
max 45
num_nulls 1
distinct_count 1
@@ -377,7 +377,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name b
data_type bigint
-min 0
+min 456
max 456
num_nulls 1
distinct_count 1
@@ -396,7 +396,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name f
data_type float
-min 0.0
+min 45454.3984375
max 45454.3984375
num_nulls 1
distinct_count 1
@@ -415,7 +415,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name d
data_type double
-min 0.0
+min 454.6565
max 454.6565
num_nulls 1
distinct_count 1
@@ -453,7 +453,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name ts
data_type timestamp
-min 0
+min 1325379723
max 1325379723
num_nulls 1
distinct_count 1
@@ -586,7 +586,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name t
data_type tinyint
-min 0
+min 2
max 2
num_nulls 1
distinct_count 1
@@ -632,7 +632,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name s
data_type smallint
-min 0
+min 3
max 3
num_nulls 1
distinct_count 1
@@ -678,7 +678,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name i
data_type int
-min 0
+min 45
max 45
num_nulls 1
distinct_count 1
@@ -724,7 +724,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name b
data_type bigint
-min 0
+min 456
max 456
num_nulls 1
distinct_count 1
@@ -770,7 +770,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name f
data_type float
-min 0.0
+min 45454.3984375
max 45454.3984375
num_nulls 1
distinct_count 1
@@ -816,7 +816,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name d
data_type double
-min 0.0
+min 454.6565
max 454.6565
num_nulls 1
distinct_count 1
@@ -908,7 +908,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats_n0
col_name ts
data_type timestamp
-min 0
+min 1325379723
max 1325379723
num_nulls 1
distinct_count 1
diff --git a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
index 068f302..904aa1f 100644
--- a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
@@ -339,7 +339,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name s
data_type smallint
-min 0
+min 3
max 3
num_nulls 1
distinct_count 1
@@ -358,7 +358,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name i
data_type int
-min 0
+min 45
max 45
num_nulls 1
distinct_count 1
@@ -377,7 +377,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name b
data_type bigint
-min 0
+min 456
max 456
num_nulls 1
distinct_count 1
@@ -396,7 +396,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name f
data_type float
-min 0.0
+min 45454.3984375
max 45454.3984375
num_nulls 1
distinct_count 1
@@ -415,7 +415,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name d
data_type double
-min 0.0
+min 454.6565
max 454.6565
num_nulls 1
distinct_count 1
@@ -453,7 +453,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name ts
data_type timestamp
-min 0
+min 1325379723
max 1325379723
num_nulls 1
distinct_count 1
@@ -586,7 +586,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name t
data_type tinyint
-min 0
+min 2
max 2
num_nulls 1
distinct_count 1
@@ -632,7 +632,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name s
data_type smallint
-min 0
+min 3
max 3
num_nulls 1
distinct_count 1
@@ -678,7 +678,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name i
data_type int
-min 0
+min 45
max 45
num_nulls 1
distinct_count 1
@@ -724,7 +724,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name b
data_type bigint
-min 0
+min 456
max 456
num_nulls 1
distinct_count 1
@@ -770,7 +770,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name f
data_type float
-min 0.0
+min 45454.3984375
max 45454.3984375
num_nulls 1
distinct_count 1
@@ -816,7 +816,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name d
data_type double
-min 0.0
+min 454.6565
max 454.6565
num_nulls 1
distinct_count 1
@@ -908,7 +908,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
col_name ts
data_type timestamp
-min 0
+min 1325379723
max 1325379723
num_nulls 1
distinct_count 1
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out
index e3dabce..a6ccdd7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out
@@ -117,7 +117,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col2
input vertices:
1 Map 2
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col2 (type: bigint)
outputColumnNames: _col0, _col1
@@ -125,13 +125,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2]
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out
index 0890943..1438ef8 100644
--- a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out
+++ b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out
@@ -131,7 +131,7 @@ STAGE PLANS:
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
outputColumnNames: _col0, _col2
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col2 (type: bigint)
outputColumnNames: _col0, _col1
@@ -139,13 +139,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
index bcdb56d..a2232b0 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
@@ -32,10 +32,9 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats);
DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats);
- Date lowValue = min(aggregateData.getLowValue(), newData.getLowValue());
- aggregateData.setLowValue(lowValue);
- Date highValue = max(aggregateData.getHighValue(), newData.getHighValue());
- aggregateData.setHighValue(highValue);
+ setLowValue(aggregateData, newData);
+ setHighValue(aggregateData, newData);
+
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
@@ -54,27 +53,43 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
+
+ aggregateColStats.getStatsData().setDateStats(aggregateData);
}
- private Date min(Date v1, Date v2) {
- if (v1 == null || v2 == null) {
- if (v1 != null) {
- return v1;
- } else {
- return v2;
- }
+ private void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ return;
+ }
+
+ Date aggregateLowValue = aggregateData.getLowValue();
+ Date newLowValue = newData.getLowValue();
+
+ Date mergedLowValue = null;
+ if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue;
+ } else {
+ mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue;
}
- return v1.compareTo(v2) < 0 ? v1 : v2;
+
+ aggregateData.setLowValue(mergedLowValue);
}
- private Date max(Date v1, Date v2) {
- if (v1 == null || v2 == null) {
- if (v1 != null) {
- return v1;
- } else {
- return v2;
- }
+ private void setHighValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ return;
+ }
+
+ Date aggregateHighValue = aggregateData.getHighValue();
+ Date newHighValue = newData.getHighValue();
+
+ Date mergedHighValue = null;
+ if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue;
+ } else {
+ mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue;
}
- return v1.compareTo(v2) > 0 ? v1 : v2;
+
+ aggregateData.setHighValue(mergedHighValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 5094358..a114188 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -24,21 +24,18 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import com.google.common.annotations.VisibleForTesting;
+
import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats;
public class DecimalColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- DecimalColumnStatsDataInspector aggregateData =
- decimalInspectorFromStats(aggregateColStats);
- DecimalColumnStatsDataInspector newData =
- decimalInspectorFromStats(newColStats);
-
- Decimal lowValue = getMin(aggregateData.getLowValue(), newData.getLowValue());
- aggregateData.setLowValue(lowValue);
+ DecimalColumnStatsDataInspector aggregateData = decimalInspectorFromStats(aggregateColStats);
+ DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(newColStats);
- Decimal highValue = getMax(aggregateData.getHighValue(), newData.getHighValue());
- aggregateData.setHighValue(highValue);
+ setLowValue(aggregateData, newData);
+ setHighValue(aggregateData, newData);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
@@ -59,29 +56,45 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
+
+ aggregateColStats.getStatsData().setDecimalStats(aggregateData);
}
- Decimal getMax(Decimal firstValue, Decimal secondValue) {
- if (firstValue == null && secondValue == null) {
- return null;
+ @VisibleForTesting
+ void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ return;
}
- if (firstValue != null && secondValue != null) {
- return firstValue.compareTo(secondValue) > 0 ? firstValue : secondValue;
+ Decimal aggregateLowValue = aggregateData.getLowValue();
+ Decimal newLowValue = newData.getLowValue();
+
+ Decimal mergedLowValue = null;
+ if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
+ mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue;
+ } else {
+ mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue;
}
- return firstValue == null ? secondValue : firstValue;
+ aggregateData.setLowValue(mergedLowValue);
}
- Decimal getMin(Decimal firstValue, Decimal secondValue) {
- if (firstValue == null && secondValue == null) {
- return null;
+ @VisibleForTesting
+ void setHighValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ return;
}
- if (firstValue != null && secondValue != null) {
- return firstValue.compareTo(secondValue) > 0 ? secondValue : firstValue;
+ Decimal aggregateHighValue = aggregateData.getHighValue();
+ Decimal newHighValue = newData.getHighValue();
+
+ Decimal mergedHighValue = null;
+ if (aggregateData.isSetHighValue() && newData.isSetHighValue()) {
+ mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue;
+ } else {
+ mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue;
}
- return firstValue == null ? secondValue : firstValue;
+ aggregateData.setHighValue(mergedHighValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
index cbacacd..a02f25b 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
@@ -30,8 +30,8 @@ public class DoubleColumnStatsMerger extends ColumnStatsMerger {
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DoubleColumnStatsDataInspector aggregateData = doubleInspectorFromStats(aggregateColStats);
DoubleColumnStatsDataInspector newData = doubleInspectorFromStats(newColStats);
- aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
- aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+ setLowValue(aggregateData, newData);
+ setHighValue(aggregateData, newData);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
@@ -50,5 +50,27 @@ public class DoubleColumnStatsMerger extends ColumnStatsMerger {
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
+
+ aggregateColStats.getStatsData().setDoubleStats(aggregateData);
+ }
+
+ private void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ return;
+ }
+ double lowValue = Math.min(
+ aggregateData.isSetLowValue() ? aggregateData.getLowValue() : Double.MAX_VALUE,
+ newData.isSetLowValue() ? newData.getLowValue() : Double.MAX_VALUE);
+ aggregateData.setLowValue(lowValue);
+ }
+
+ private void setHighValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ return;
+ }
+ double highValue = Math.max(
+ aggregateData.isSetHighValue() ? aggregateData.getHighValue() : Double.MIN_VALUE,
+ newData.isSetHighValue() ? newData.getHighValue() : Double.MIN_VALUE);
+ aggregateData.setLowValue(highValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
index 8e70371..67adbf1 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
@@ -30,8 +30,8 @@ public class LongColumnStatsMerger extends ColumnStatsMerger {
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
LongColumnStatsDataInspector aggregateData = longInspectorFromStats(aggregateColStats);
LongColumnStatsDataInspector newData = longInspectorFromStats(newColStats);
- aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
- aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+ setLowValue(aggregateData, newData);
+ setHighValue(aggregateData, newData);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
@@ -50,5 +50,27 @@ public class LongColumnStatsMerger extends ColumnStatsMerger {
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
+
+ aggregateColStats.getStatsData().setLongStats(aggregateData);
+ }
+
+ private void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
+ return;
+ }
+ long lowValue = Math.min(
+ aggregateData.isSetLowValue() ? aggregateData.getLowValue() : Long.MAX_VALUE,
+ newData.isSetLowValue() ? newData.getLowValue() : Long.MAX_VALUE);
+ aggregateData.setLowValue(lowValue);
+ }
+
+ private void setHighValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) {
+ if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) {
+ return;
+ }
+ long highValue = Math.max(
+ aggregateData.isSetHighValue() ? aggregateData.getHighValue() : Long.MIN_VALUE,
+ newData.isSetHighValue() ? newData.getHighValue() : Long.MIN_VALUE);
+ aggregateData.setHighValue(highValue);
}
}
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
index 762685d..dec4485 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
@@ -50,5 +50,7 @@ public class StringColumnStatsMerger extends ColumnStatsMerger {
+ aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
}
+
+ aggregateColStats.getStatsData().setStringStats(aggregateData);
}
}
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
index ca0a6c0..a9d55ea 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
@@ -36,6 +36,19 @@ public class DecimalColumnStatsMergerTest {
private static final Decimal DECIMAL_5 = DecimalUtils.getDecimal(5, 0);
private static final Decimal DECIMAL_20 = DecimalUtils.getDecimal(2, 1);
+ private static final DecimalColumnStatsDataInspector DATA_3 = new DecimalColumnStatsDataInspector();
+ private static final DecimalColumnStatsDataInspector DATA_5 = new DecimalColumnStatsDataInspector();
+ private static final DecimalColumnStatsDataInspector DATA_20 = new DecimalColumnStatsDataInspector();
+
+ static {
+ DATA_3.setLowValue(DECIMAL_3);
+ DATA_3.setHighValue(DECIMAL_3);
+ DATA_5.setLowValue(DECIMAL_5);
+ DATA_5.setHighValue(DECIMAL_5);
+ DATA_20.setLowValue(DECIMAL_20);
+ DATA_20.setHighValue(DECIMAL_20);
+ }
+
private DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger();
@Test
@@ -165,57 +178,90 @@ public class DecimalColumnStatsMergerTest {
@Test
public void testCompareSimple() {
- Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_3, DECIMAL_5));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_5);
+ merger.setHighValue(data1, data2);
+ Assert.assertEquals(DECIMAL_5, data1.getHighValue());
}
@Test
public void testCompareSimpleFlipped() {
- Assert.assertEquals(DECIMAL_5, merger.getMax(DECIMAL_5, DECIMAL_3));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_5);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3);
+ merger.setHighValue(data1, data2);
+ Assert.assertEquals(DECIMAL_5, data1.getHighValue());
}
@Test
public void testCompareSimpleReversed() {
- Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, DECIMAL_5));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_5);
+ merger.setLowValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getLowValue());
}
@Test
public void testCompareSimpleFlippedReversed() {
- Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_5, DECIMAL_3));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_5);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3);
+ merger.setLowValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getLowValue());
}
@Test
public void testCompareUnscaledValue() {
- Assert.assertEquals(DECIMAL_20, merger.getMax(DECIMAL_3, DECIMAL_20));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_20);
+ merger.setHighValue(data1, data2);
+ Assert.assertEquals(DECIMAL_20, data1.getHighValue());
}
@Test
public void testCompareNullsMin() {
- Assert.assertNull(merger.getMin(null, null));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector();
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector();
+ merger.setLowValue(data1, data2);
+ Assert.assertNull(data1.getLowValue());
}
@Test
public void testCompareNullsMax() {
- Assert.assertNull(merger.getMax(null, null));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector();
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector();
+ merger.setHighValue(data1, data2);
+ Assert.assertNull(data1.getHighValue());
}
@Test
public void testCompareFirstNullMin() {
- Assert.assertEquals(DECIMAL_3, merger.getMin(null, DECIMAL_3));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector();
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3);
+ merger.setLowValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getLowValue());
}
@Test
public void testCompareSecondNullMin() {
- Assert.assertEquals(DECIMAL_3, merger.getMin(DECIMAL_3, null));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector();
+ merger.setLowValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getLowValue());
}
@Test
public void testCompareFirstNullMax() {
- Assert.assertEquals(DECIMAL_3, merger.getMax(null, DECIMAL_3));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3);
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector();
+ merger.setHighValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getHighValue());
}
@Test
public void testCompareSecondNullMax() {
- Assert.assertEquals(DECIMAL_3, merger.getMax(DECIMAL_3, null));
+ DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector();
+ DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3);
+ merger.setHighValue(data1, data2);
+ Assert.assertEquals(DECIMAL_3, data1.getHighValue());
}
private DecimalColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Decimal lowValue,