You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/08/17 22:42:33 UTC
[09/10] hive git commit: HIVE-17286: Avoid expensive String
serialization/deserialization for bitvectors (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
index 1b44dd9..bb4a725 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,14 +65,14 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
.getStatsData().getSetField());
}
- if (!cso.getStatsData().getDoubleStats().isSetBitVectors()
- || cso.getStatsData().getDoubleStats().getBitVectors().length() == 0) {
+ DoubleColumnStatsDataInspector doubleColumnStatsData =
+ (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
+ if (doubleColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
} else {
// check if all of the bit vectors can merge
- NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(cso.getStatsData().getDoubleStats().getBitVectors());
+ NumDistinctValueEstimator estimator = doubleColumnStatsData.getNdvEstimator();
if (ndvEstimator == null) {
ndvEstimator = estimator;
} else {
@@ -91,19 +92,19 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
- DoubleColumnStatsData aggregateData = null;
+ DoubleColumnStatsDataInspector aggregateData = null;
long lowerBound = 0;
long higherBound = 0;
double densityAvgSum = 0.0;
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
+ DoubleColumnStatsDataInspector newData =
+ (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
lowerBound = Math.max(lowerBound, newData.getNumDVs());
higherBound += newData.getNumDVs();
densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
if (ndvEstimator != null) {
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (aggregateData == null) {
aggregateData = newData.deepCopy();
@@ -174,7 +175,8 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
for (ColumnStatistics cs : css) {
String partName = cs.getStatsDesc().getPartName();
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
+ DoubleColumnStatsDataInspector newData =
+ (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
@@ -210,8 +212,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (length > 0) {
// we have to set ndv
@@ -239,7 +240,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
int numPartsWithStats, Map<String, Double> adjustedIndexMap,
Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
int rightBorderInd = numParts;
- DoubleColumnStatsData extrapolateDoubleData = new DoubleColumnStatsData();
+ DoubleColumnStatsDataInspector extrapolateDoubleData = new DoubleColumnStatsDataInspector();
Map<String, DoubleColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDoubleStats());
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
index 802ad1a..5b1145e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,14 +65,14 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
.getStatsData().getSetField());
}
- if (!cso.getStatsData().getLongStats().isSetBitVectors()
- || cso.getStatsData().getLongStats().getBitVectors().length() == 0) {
+ LongColumnStatsDataInspector longColumnStatsData =
+ (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
+ if (longColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
} else {
// check if all of the bit vectors can merge
- NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(cso.getStatsData().getLongStats().getBitVectors());
+ NumDistinctValueEstimator estimator = longColumnStatsData.getNdvEstimator();
if (ndvEstimator == null) {
ndvEstimator = estimator;
} else {
@@ -91,19 +92,19 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
- LongColumnStatsData aggregateData = null;
+ LongColumnStatsDataInspector aggregateData = null;
long lowerBound = 0;
long higherBound = 0;
double densityAvgSum = 0.0;
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- LongColumnStatsData newData = cso.getStatsData().getLongStats();
+ LongColumnStatsDataInspector newData =
+ (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
lowerBound = Math.max(lowerBound, newData.getNumDVs());
higherBound += newData.getNumDVs();
densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
if (ndvEstimator != null) {
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (aggregateData == null) {
aggregateData = newData.deepCopy();
@@ -171,11 +172,12 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
double pseudoIndexSum = 0;
int length = 0;
int curIndex = -1;
- LongColumnStatsData aggregateData = null;
+ LongColumnStatsDataInspector aggregateData = null;
for (ColumnStatistics cs : css) {
String partName = cs.getStatsDesc().getPartName();
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- LongColumnStatsData newData = cso.getStatsData().getLongStats();
+ LongColumnStatsDataInspector newData =
+ (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
@@ -211,8 +213,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (length > 0) {
// we have to set ndv
@@ -240,7 +241,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
int numPartsWithStats, Map<String, Double> adjustedIndexMap,
Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
int rightBorderInd = numParts;
- LongColumnStatsData extrapolateLongData = new LongColumnStatsData();
+ LongColumnStatsDataInspector extrapolateLongData = new LongColumnStatsDataInspector();
Map<String, LongColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getLongStats());
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
index e1a781f..1b29f92 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -65,14 +66,14 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
.getStatsData().getSetField());
}
- if (!cso.getStatsData().getStringStats().isSetBitVectors()
- || cso.getStatsData().getStringStats().getBitVectors().length() == 0) {
+ StringColumnStatsDataInspector stringColumnStatsData =
+ (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
+ if (stringColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
} else {
// check if all of the bit vectors can merge
- NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors());
+ NumDistinctValueEstimator estimator = stringColumnStatsData.getNdvEstimator();
if (ndvEstimator == null) {
ndvEstimator = estimator;
} else {
@@ -92,13 +93,13 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
- StringColumnStatsData aggregateData = null;
+ StringColumnStatsDataInspector aggregateData = null;
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- StringColumnStatsData newData = cso.getStatsData().getStringStats();
+ StringColumnStatsDataInspector newData =
+ (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
if (ndvEstimator != null) {
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (aggregateData == null) {
aggregateData = newData.deepCopy();
@@ -146,11 +147,12 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
double pseudoIndexSum = 0;
int length = 0;
int curIndex = -1;
- StringColumnStatsData aggregateData = null;
+ StringColumnStatsDataInspector aggregateData = null;
for (ColumnStatistics cs : css) {
String partName = cs.getStatsDesc().getPartName();
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- StringColumnStatsData newData = cso.getStatsData().getStringStats();
+ StringColumnStatsDataInspector newData =
+ (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
@@ -185,8 +187,7 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
newData.getMaxColLen()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
+ ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
if (length > 0) {
// we have to set ndv
@@ -211,7 +212,7 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
int numPartsWithStats, Map<String, Double> adjustedIndexMap,
Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
int rightBorderInd = numParts;
- StringColumnStatsData extrapolateStringData = new StringColumnStatsData();
+ StringColumnStatsDataInspector extrapolateStringData = new StringColumnStatsDataInspector();
Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats());
@@ -295,7 +296,6 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
}
extrapolateStringData.setAvgColLen(avgColLen);
- ;
extrapolateStringData.setMaxColLen((long) maxColLen);
extrapolateStringData.setNumNulls(numNulls);
extrapolateStringData.setNumDVs(ndv);
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
new file mode 100644
index 0000000..937ebf2
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DateColumnStatsDataInspector extends DateColumnStatsData {
+
+ private NumDistinctValueEstimator ndvEstimator;
+
+ public DateColumnStatsDataInspector() {
+ super();
+ }
+
+ public DateColumnStatsDataInspector(long numNulls, long numDVs) {
+ super(numNulls, numDVs);
+ }
+
+ public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) {
+ super(other);
+ if (other.ndvEstimator != null) {
+ super.setBitVectors(ndvEstimator.serialize());
+ }
+ }
+
+ @Override
+ public DateColumnStatsDataInspector deepCopy() {
+ return new DateColumnStatsDataInspector(this);
+ }
+
+ @Override
+ public byte[] getBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.getBitVectors();
+ }
+
+ @Override
+ public ByteBuffer bufferForBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.bufferForBitVectors();
+ }
+
+ @Override
+ public void setBitVectors(byte[] bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void setBitVectors(ByteBuffer bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void unsetBitVectors() {
+ super.unsetBitVectors();
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public boolean isSetBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.isSetBitVectors();
+ }
+
+ @Override
+ public void setBitVectorsIsSet(boolean value) {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ super.setBitVectorsIsSet(value);
+ }
+
+ public NumDistinctValueEstimator getNdvEstimator() {
+ if (isSetBitVectors() && getBitVectors().length != 0) {
+ updateNdvEstimator();
+ }
+ return ndvEstimator;
+ }
+
+ public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+ super.unsetBitVectors();
+ this.ndvEstimator = ndvEstimator;
+ }
+
+ private void updateBitVectors() {
+ super.setBitVectors(ndvEstimator.serialize());
+ this.ndvEstimator = null;
+ }
+
+ private void updateNdvEstimator() {
+ this.ndvEstimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(super.getBitVectors());
+ super.unsetBitVectors();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
new file mode 100644
index 0000000..586b5d8
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DecimalColumnStatsDataInspector extends DecimalColumnStatsData {
+
+ private NumDistinctValueEstimator ndvEstimator;
+
+ public DecimalColumnStatsDataInspector() {
+ super();
+ }
+
+ public DecimalColumnStatsDataInspector(long numNulls, long numDVs) {
+ super(numNulls, numDVs);
+ }
+
+ public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector other) {
+ super(other);
+ if (other.ndvEstimator != null) {
+ super.setBitVectors(ndvEstimator.serialize());
+ }
+ }
+
+ @Override
+ public DecimalColumnStatsDataInspector deepCopy() {
+ return new DecimalColumnStatsDataInspector(this);
+ }
+
+ @Override
+ public byte[] getBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.getBitVectors();
+ }
+
+ @Override
+ public ByteBuffer bufferForBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.bufferForBitVectors();
+ }
+
+ @Override
+ public void setBitVectors(byte[] bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void setBitVectors(ByteBuffer bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void unsetBitVectors() {
+ super.unsetBitVectors();
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public boolean isSetBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.isSetBitVectors();
+ }
+
+ @Override
+ public void setBitVectorsIsSet(boolean value) {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ super.setBitVectorsIsSet(value);
+ }
+
+ public NumDistinctValueEstimator getNdvEstimator() {
+ if (isSetBitVectors() && getBitVectors().length != 0) {
+ updateNdvEstimator();
+ }
+ return ndvEstimator;
+ }
+
+ public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+ super.unsetBitVectors();
+ this.ndvEstimator = ndvEstimator;
+ }
+
+ private void updateBitVectors() {
+ super.setBitVectors(ndvEstimator.serialize());
+ this.ndvEstimator = null;
+ }
+
+ private void updateNdvEstimator() {
+ this.ndvEstimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(super.getBitVectors());
+ super.unsetBitVectors();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
new file mode 100644
index 0000000..3609ddd
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DoubleColumnStatsDataInspector extends DoubleColumnStatsData {
+
+ private NumDistinctValueEstimator ndvEstimator;
+
+ public DoubleColumnStatsDataInspector() {
+ super();
+ }
+
+ public DoubleColumnStatsDataInspector(long numNulls, long numDVs) {
+ super(numNulls, numDVs);
+ }
+
+ public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) {
+ super(other);
+ if (other.ndvEstimator != null) {
+ super.setBitVectors(ndvEstimator.serialize());
+ }
+ }
+
+ @Override
+ public DoubleColumnStatsDataInspector deepCopy() {
+ return new DoubleColumnStatsDataInspector(this);
+ }
+
+ @Override
+ public byte[] getBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.getBitVectors();
+ }
+
+ @Override
+ public ByteBuffer bufferForBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.bufferForBitVectors();
+ }
+
+ @Override
+ public void setBitVectors(byte[] bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void setBitVectors(ByteBuffer bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void unsetBitVectors() {
+ super.unsetBitVectors();
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public boolean isSetBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.isSetBitVectors();
+ }
+
+ @Override
+ public void setBitVectorsIsSet(boolean value) {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ super.setBitVectorsIsSet(value);
+ }
+
+ public NumDistinctValueEstimator getNdvEstimator() {
+ if (isSetBitVectors() && getBitVectors().length != 0) {
+ updateNdvEstimator();
+ }
+ return ndvEstimator;
+ }
+
+ public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+ super.unsetBitVectors();
+ this.ndvEstimator = ndvEstimator;
+ }
+
+ private void updateBitVectors() {
+ super.setBitVectors(ndvEstimator.serialize());
+ this.ndvEstimator = null;
+ }
+
+ private void updateNdvEstimator() {
+ this.ndvEstimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(super.getBitVectors());
+ super.unsetBitVectors();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
new file mode 100644
index 0000000..5632d91
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+
+@SuppressWarnings("serial")
+public class LongColumnStatsDataInspector extends LongColumnStatsData {
+
+ private NumDistinctValueEstimator ndvEstimator;
+
+ public LongColumnStatsDataInspector() {
+ super();
+ }
+
+ public LongColumnStatsDataInspector(long numNulls, long numDVs) {
+ super(numNulls, numDVs);
+ }
+
+ public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) {
+ super(other);
+ if (other.ndvEstimator != null) {
+ super.setBitVectors(ndvEstimator.serialize());
+ }
+ }
+
+ @Override
+ public LongColumnStatsDataInspector deepCopy() {
+ return new LongColumnStatsDataInspector(this);
+ }
+
+ @Override
+ public byte[] getBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.getBitVectors();
+ }
+
+ @Override
+ public ByteBuffer bufferForBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.bufferForBitVectors();
+ }
+
+ @Override
+ public void setBitVectors(byte[] bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void setBitVectors(ByteBuffer bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void unsetBitVectors() {
+ super.unsetBitVectors();
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public boolean isSetBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.isSetBitVectors();
+ }
+
+ @Override
+ public void setBitVectorsIsSet(boolean value) {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ super.setBitVectorsIsSet(value);
+ }
+
+ public NumDistinctValueEstimator getNdvEstimator() {
+ if (isSetBitVectors() && getBitVectors().length != 0) {
+ updateNdvEstimator();
+ }
+ return ndvEstimator;
+ }
+
+ public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+ super.unsetBitVectors();
+ this.ndvEstimator = ndvEstimator;
+ }
+
+ private void updateBitVectors() {
+ super.setBitVectors(ndvEstimator.serialize());
+ this.ndvEstimator = null;
+ }
+
+ private void updateNdvEstimator() {
+ this.ndvEstimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(super.getBitVectors());
+ super.unsetBitVectors();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
new file mode 100644
index 0000000..2db037b
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+
+@SuppressWarnings("serial")
+public class StringColumnStatsDataInspector extends StringColumnStatsData {
+
+ private NumDistinctValueEstimator ndvEstimator;
+
+ public StringColumnStatsDataInspector() {
+ super();
+ }
+
+ public StringColumnStatsDataInspector(long maxColLen, double avgColLen,
+ long numNulls, long numDVs) {
+ super(maxColLen, avgColLen, numNulls, numDVs);
+ }
+
+ public StringColumnStatsDataInspector(StringColumnStatsDataInspector other) {
+ super(other);
+ if (other.ndvEstimator != null) {
+ super.setBitVectors(ndvEstimator.serialize());
+ }
+ }
+
+ @Override
+ public StringColumnStatsDataInspector deepCopy() {
+ return new StringColumnStatsDataInspector(this);
+ }
+
+ @Override
+ public byte[] getBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.getBitVectors();
+ }
+
+ @Override
+ public ByteBuffer bufferForBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.bufferForBitVectors();
+ }
+
+ @Override
+ public void setBitVectors(byte[] bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void setBitVectors(ByteBuffer bitVectors) {
+ super.setBitVectors(bitVectors);
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public void unsetBitVectors() {
+ super.unsetBitVectors();
+ this.ndvEstimator = null;
+ }
+
+ @Override
+ public boolean isSetBitVectors() {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ return super.isSetBitVectors();
+ }
+
+ @Override
+ public void setBitVectorsIsSet(boolean value) {
+ if (ndvEstimator != null) {
+ updateBitVectors();
+ }
+ super.setBitVectorsIsSet(value);
+ }
+
+ public NumDistinctValueEstimator getNdvEstimator() {
+ if (isSetBitVectors() && getBitVectors().length != 0) {
+ updateNdvEstimator();
+ }
+ return ndvEstimator;
+ }
+
+ public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+ super.unsetBitVectors();
+ this.ndvEstimator = ndvEstimator;
+ }
+
+ private void updateBitVectors() {
+ super.setBitVectors(ndvEstimator.serialize());
+ this.ndvEstimator = null;
+ }
+
+ private void updateNdvEstimator() {
+ this.ndvEstimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(super.getBitVectors());
+ super.unsetBitVectors();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
index 0ce1847..66be524 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
@@ -19,19 +19,16 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
public class ColumnStatsMergerFactory {
@@ -89,15 +86,15 @@ public class ColumnStatsMergerFactory {
break;
case LONG_STATS:
- csd.setLongStats(new LongColumnStatsData());
+ csd.setLongStats(new LongColumnStatsDataInspector());
break;
case DOUBLE_STATS:
- csd.setDoubleStats(new DoubleColumnStatsData());
+ csd.setDoubleStats(new DoubleColumnStatsDataInspector());
break;
case STRING_STATS:
- csd.setStringStats(new StringColumnStatsData());
+ csd.setStringStats(new StringColumnStatsDataInspector());
break;
case BINARY_STATS:
@@ -105,11 +102,11 @@ public class ColumnStatsMergerFactory {
break;
case DECIMAL_STATS:
- csd.setDecimalStats(new DecimalColumnStatsData());
+ csd.setDecimalStats(new DecimalColumnStatsDataInspector());
break;
case DATE_STATS:
- csd.setDateStats(new DateColumnStatsData());
+ csd.setDateStats(new DateColumnStatsDataInspector());
break;
default:
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
index 2542a00..e783d3c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
@@ -20,16 +20,17 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
public class DateColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- DateColumnStatsData aggregateData = aggregateColStats.getStatsData().getDateStats();
- DateColumnStatsData newData = newColStats.getStatsData().getDateStats();
+ DateColumnStatsDataInspector aggregateData =
+ (DateColumnStatsDataInspector) aggregateColStats.getStatsData().getDateStats();
+ DateColumnStatsDataInspector newData =
+ (DateColumnStatsDataInspector) newColStats.getStatsData().getDateStats();
Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData
.getLowValue() : newData.getLowValue();
aggregateData.setLowValue(lowValue);
@@ -37,19 +38,16 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
.getHighValue() : newData.getHighValue();
aggregateData.setHighValue(highValue);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
- || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
- NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(aggregateData.getBitVectors());
- NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors());
+ NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+ NumDistinctValueEstimator newEst = newData.getNdvEstimator();
long ndv = -1;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
- aggregateData.setBitVectors(oldEst.serialize());
+ aggregateData.setNdvEstimator(oldEst);
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 4e8e129..54099f6 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -20,16 +20,17 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
public class DecimalColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
- DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
+ DecimalColumnStatsDataInspector aggregateData =
+ (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats();
+ DecimalColumnStatsDataInspector newData =
+ (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats();
Decimal lowValue = aggregateData.getLowValue() != null
&& (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
.getLowValue() : newData.getLowValue();
@@ -39,19 +40,16 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
.getHighValue() : newData.getHighValue();
aggregateData.setHighValue(highValue);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
- || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
- NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(aggregateData.getBitVectors());
- NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors());
+ NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+ NumDistinctValueEstimator newEst = newData.getNdvEstimator();
long ndv = -1;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
- aggregateData.setBitVectors(oldEst.serialize());
+ aggregateData.setNdvEstimator(oldEst);
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
index 4ef5c39..817a55d 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
@@ -20,31 +20,29 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
public class DoubleColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats();
- DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats();
+ DoubleColumnStatsDataInspector aggregateData =
+ (DoubleColumnStatsDataInspector) aggregateColStats.getStatsData().getDoubleStats();
+ DoubleColumnStatsDataInspector newData =
+ (DoubleColumnStatsDataInspector) newColStats.getStatsData().getDoubleStats();
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
- || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
- NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(aggregateData.getBitVectors());
- NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors());
+ NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+ NumDistinctValueEstimator newEst = newData.getNdvEstimator();
long ndv = -1;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
- aggregateData.setBitVectors(oldEst.serialize());
+ aggregateData.setNdvEstimator(oldEst);
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
index acf7f03..dc048e0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
@@ -20,31 +20,29 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
public class LongColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats();
- LongColumnStatsData newData = newColStats.getStatsData().getLongStats();
+ LongColumnStatsDataInspector aggregateData =
+ (LongColumnStatsDataInspector) aggregateColStats.getStatsData().getLongStats();
+ LongColumnStatsDataInspector newData =
+ (LongColumnStatsDataInspector) newColStats.getStatsData().getLongStats();
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
- || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
- NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(aggregateData.getBitVectors());
- NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors());
+ NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+ NumDistinctValueEstimator newEst = newData.getNdvEstimator();
long ndv = -1;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
- aggregateData.setBitVectors(oldEst.serialize());
+ aggregateData.setNdvEstimator(oldEst);
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
index b3cd33c..e353b8f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
@@ -20,31 +20,29 @@
package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
public class StringColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
- StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats();
- StringColumnStatsData newData = newColStats.getStatsData().getStringStats();
+ StringColumnStatsDataInspector aggregateData =
+ (StringColumnStatsDataInspector) aggregateColStats.getStatsData().getStringStats();
+ StringColumnStatsDataInspector newData =
+ (StringColumnStatsDataInspector) newColStats.getStatsData().getStringStats();
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
- || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
- NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(aggregateData.getBitVectors());
- NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors());
+ NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+ NumDistinctValueEstimator newEst = newData.getNdvEstimator();
long ndv = -1;
if (oldEst.canMerge(newEst)) {
oldEst.mergeEstimators(newEst);
ndv = oldEst.estimateNumDistinctValues();
- aggregateData.setBitVectors(oldEst.serialize());
+ aggregateData.setNdvEstimator(oldEst);
} else {
ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
index 54828f2..8312b34 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
@@ -20,10 +20,7 @@ package org.apache.hadoop.hive.metastore;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -35,27 +32,21 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.FileMetadataExprType;
-import org.apache.hadoop.hive.metastore.api.Function;
-import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
-import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -93,7 +84,7 @@ public class TestOldSchema {
}
}
- String bitVectors[] = new String[2];
+ byte bitVectors[][] = new byte[2][];
@Before
public void setUp() throws Exception {
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
index e31dad3..6709c9d 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
@@ -24,7 +24,6 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.ObjectStore;
import org.apache.hadoop.hive.metastore.TableType;
@@ -37,14 +36,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
@@ -435,7 +434,7 @@ public class TestCachedStore {
// Col1
ColumnStatisticsData data1 = new ColumnStatisticsData();
ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1);
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(col1LowVal);
longStats.setHighValue(col1HighVal);
longStats.setNumNulls(col1Nulls);
@@ -446,7 +445,7 @@ public class TestCachedStore {
// Col2
ColumnStatisticsData data2 = new ColumnStatisticsData();
ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2);
- StringColumnStatsData stringStats = new StringColumnStatsData();
+ StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setMaxColLen(col2MaxColLen);
stringStats.setAvgColLen(col2AvgColLen);
stringStats.setNumNulls(col2Nulls);
@@ -718,7 +717,7 @@ public class TestCachedStore {
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(0);
longStats.setHighValue(100);
longStats.setNumNulls(50);
@@ -784,7 +783,7 @@ public class TestCachedStore {
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(0);
longStats.setHighValue(100);
longStats.setNumNulls(50);
@@ -854,7 +853,7 @@ public class TestCachedStore {
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(0);
longStats.setHighValue(100);
longStats.setNumNulls(50);
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index d96f432..2b2c004 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -24,10 +24,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
@@ -37,15 +34,15 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
@@ -63,6 +60,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
@@ -70,6 +68,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspect
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* ColumnStatsTask implementation.
@@ -136,8 +136,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsObj.getStatsData().getDoubleStats().setLowValue(d);
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
- statsObj.getStatsData().getDoubleStats().setBitVectors(v);;
+ byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDoubleStats().setBitVectors(buf);
}
}
@@ -157,8 +157,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
- statsObj.getStatsData().getDecimalStats().setBitVectors(v);;
+ byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDecimalStats().setBitVectors(buf);
}
}
@@ -182,8 +182,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsObj.getStatsData().getLongStats().setLowValue(v);
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
- statsObj.getStatsData().getLongStats().setBitVectors(v);;
+ byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getLongStats().setBitVectors(buf);
}
}
@@ -203,8 +203,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsObj.getStatsData().getStringStats().setMaxColLen(v);
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
- statsObj.getStatsData().getStringStats().setBitVectors(v);;
+ byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getStringStats().setBitVectors(buf);
}
}
@@ -238,8 +238,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays()));
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
- statsObj.getStatsData().getDateStats().setBitVectors(v);;
+ byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDateStats().setBitVectors(buf);
}
}
@@ -255,15 +255,15 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase("long")) {
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("double")) {
- DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("string")) {
- StringColumnStatsData stringStats = new StringColumnStatsData();
+ StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("boolean")) {
@@ -275,11 +275,11 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("decimal")) {
- DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("date")) {
- DateColumnStatsData dateStats = new DateColumnStatsData();
+ DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 2acc777..82fbf28 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -26,8 +26,6 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -35,14 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
@@ -55,6 +53,8 @@ import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* ColumnStatsUpdateTask implementation. For example, ALTER TABLE src_stat
@@ -101,7 +101,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint")
|| columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int")
|| columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) {
- LongColumnStatsData longStats = new LongColumnStatsData();
+ LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
longStats.setLowValueIsSet(false);
@@ -125,7 +125,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
- DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
doubleStats.setNumNullsIsSet(false);
doubleStats.setNumDVsIsSet(false);
doubleStats.setLowValueIsSet(false);
@@ -150,7 +150,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char")
|| columnType.toLowerCase().startsWith("varchar")) { //char(x),varchar(x) types
- StringColumnStatsData stringStats = new StringColumnStatsData();
+ StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setMaxColLenIsSet(false);
stringStats.setAvgColLenIsSet(false);
stringStats.setNumNullsIsSet(false);
@@ -216,7 +216,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (columnType.toLowerCase().startsWith("decimal")) { //decimal(a,b) type
- DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
decimalStats.setNumNullsIsSet(false);
decimalStats.setNumDVsIsSet(false);
decimalStats.setLowValueIsSet(false);
@@ -244,7 +244,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("date")) {
- DateColumnStatsData dateStats = new DateColumnStatsData();
+ DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index 2380073..a7f2967 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.metadata.formatting;
+import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -54,6 +55,7 @@ import org.apache.hive.common.util.HiveStringUtils;
import java.math.BigInteger;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
@@ -176,6 +178,16 @@ public final class MetaDataFormatUtils {
return writableValue.toString();
}
+ private static String convertToString(byte[] buf) {
+ if (buf == null || buf.length == 0) {
+ return "";
+ }
+ byte[] sub = new byte[2];
+ sub[0] = (byte) buf[0];
+ sub[1] = (byte) buf[1];
+ return new String(sub);
+ }
+
private static ColumnStatisticsObj getColumnStatisticsObject(String colName,
String colType, List<ColumnStatisticsObj> colStats) {
if (colStats != null && !colStats.isEmpty()) {
@@ -700,7 +712,7 @@ public final class MetaDataFormatUtils {
} else if (csd.isSetStringStats()) {
StringColumnStatsData scsd = csd.getStringStats();
appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
- scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(),
+ convertToString(scsd.getBitVectors()), scsd.getAvgColLen(),
scsd.getMaxColLen(), "", "");
} else if (csd.isSetBooleanStats()) {
BooleanColumnStatsData bcsd = csd.getBooleanStats();
@@ -710,22 +722,26 @@ public final class MetaDataFormatUtils {
DecimalColumnStatsData dcsd = csd.getDecimalStats();
appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()),
convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(),
- dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(),
+ convertToString(dcsd.getBitVectors()),
"", "", "", "");
} else if (csd.isSetDoubleStats()) {
DoubleColumnStatsData dcsd = csd.getDoubleStats();
appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
- dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
+ dcsd.getNumDVs(), convertToString(dcsd.getBitVectors()),
+ "", "", "", "");
} else if (csd.isSetLongStats()) {
LongColumnStatsData lcsd = csd.getLongStats();
appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(),
- lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", "");
+ lcsd.getNumDVs(), convertToString(lcsd.getBitVectors()),
+ "", "", "", "");
} else if (csd.isSetDateStats()) {
DateColumnStatsData dcsd = csd.getDateStats();
appendColumnStats(tableInfo,
convertToString(dcsd.getLowValue()),
convertToString(dcsd.getHighValue()),
- dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
+ dcsd.getNumNulls(), dcsd.getNumDVs(),
+ convertToString(dcsd.getBitVectors()),
+ "", "", "", "");
}
} else {
appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", "");