You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/08/17 22:42:33 UTC

[09/10] hive git commit: HIVE-17286: Avoid expensive String serialization/deserialization for bitvectors (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
index 1b44dd9..bb4a725 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,14 +65,14 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
         statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
             .getStatsData().getSetField());
       }
-      if (!cso.getStatsData().getDoubleStats().isSetBitVectors()
-          || cso.getStatsData().getDoubleStats().getBitVectors().length() == 0) {
+      DoubleColumnStatsDataInspector doubleColumnStatsData =
+          (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
+      if (doubleColumnStatsData.getNdvEstimator() == null) {
         ndvEstimator = null;
         break;
       } else {
         // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
-            .getNumDistinctValueEstimator(cso.getStatsData().getDoubleStats().getBitVectors());
+        NumDistinctValueEstimator estimator = doubleColumnStatsData.getNdvEstimator();
         if (ndvEstimator == null) {
           ndvEstimator = estimator;
         } else {
@@ -91,19 +92,19 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
     LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
     ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
     if (doAllPartitionContainStats || css.size() < 2) {
-      DoubleColumnStatsData aggregateData = null;
+      DoubleColumnStatsDataInspector aggregateData = null;
       long lowerBound = 0;
       long higherBound = 0;
       double densityAvgSum = 0.0;
       for (ColumnStatistics cs : css) {
         ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
+        DoubleColumnStatsDataInspector newData =
+            (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
         lowerBound = Math.max(lowerBound, newData.getNumDVs());
         higherBound += newData.getNumDVs();
         densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
         if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (aggregateData == null) {
           aggregateData = newData.deepCopy();
@@ -174,7 +175,8 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
         for (ColumnStatistics cs : css) {
           String partName = cs.getStatsDesc().getPartName();
           ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
+          DoubleColumnStatsDataInspector newData =
+              (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
           // newData.isSetBitVectors() should be true for sure because we
           // already checked it before.
           if (indexMap.get(partName) != curIndex) {
@@ -210,8 +212,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
                 newData.getHighValue()));
             aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
           }
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (length > 0) {
           // we have to set ndv
@@ -239,7 +240,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
       int numPartsWithStats, Map<String, Double> adjustedIndexMap,
       Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
     int rightBorderInd = numParts;
-    DoubleColumnStatsData extrapolateDoubleData = new DoubleColumnStatsData();
+    DoubleColumnStatsDataInspector extrapolateDoubleData = new DoubleColumnStatsDataInspector();
     Map<String, DoubleColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
     for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
       extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDoubleStats());

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
index 802ad1a..5b1145e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,14 +65,14 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
         statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
             .getStatsData().getSetField());
       }
-      if (!cso.getStatsData().getLongStats().isSetBitVectors()
-          || cso.getStatsData().getLongStats().getBitVectors().length() == 0) {
+      LongColumnStatsDataInspector longColumnStatsData =
+          (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
+      if (longColumnStatsData.getNdvEstimator() == null) {
         ndvEstimator = null;
         break;
       } else {
         // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
-            .getNumDistinctValueEstimator(cso.getStatsData().getLongStats().getBitVectors());
+        NumDistinctValueEstimator estimator = longColumnStatsData.getNdvEstimator();
         if (ndvEstimator == null) {
           ndvEstimator = estimator;
         } else {
@@ -91,19 +92,19 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
     LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
     ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
     if (doAllPartitionContainStats || css.size() < 2) {
-      LongColumnStatsData aggregateData = null;
+      LongColumnStatsDataInspector aggregateData = null;
       long lowerBound = 0;
       long higherBound = 0;
       double densityAvgSum = 0.0;
       for (ColumnStatistics cs : css) {
         ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        LongColumnStatsData newData = cso.getStatsData().getLongStats();
+        LongColumnStatsDataInspector newData =
+            (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
         lowerBound = Math.max(lowerBound, newData.getNumDVs());
         higherBound += newData.getNumDVs();
         densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
         if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (aggregateData == null) {
           aggregateData = newData.deepCopy();
@@ -171,11 +172,12 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
         double pseudoIndexSum = 0;
         int length = 0;
         int curIndex = -1;
-        LongColumnStatsData aggregateData = null;
+        LongColumnStatsDataInspector aggregateData = null;
         for (ColumnStatistics cs : css) {
           String partName = cs.getStatsDesc().getPartName();
           ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          LongColumnStatsData newData = cso.getStatsData().getLongStats();
+          LongColumnStatsDataInspector newData =
+              (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
           // newData.isSetBitVectors() should be true for sure because we
           // already checked it before.
           if (indexMap.get(partName) != curIndex) {
@@ -211,8 +213,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
                 newData.getHighValue()));
             aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
           }
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (length > 0) {
           // we have to set ndv
@@ -240,7 +241,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
       int numPartsWithStats, Map<String, Double> adjustedIndexMap,
       Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
     int rightBorderInd = numParts;
-    LongColumnStatsData extrapolateLongData = new LongColumnStatsData();
+    LongColumnStatsDataInspector extrapolateLongData = new LongColumnStatsDataInspector();
     Map<String, LongColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
     for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
       extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getLongStats());

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
index e1a781f..1b29f92 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,14 +66,14 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
         statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
             .getStatsData().getSetField());
       }
-      if (!cso.getStatsData().getStringStats().isSetBitVectors()
-          || cso.getStatsData().getStringStats().getBitVectors().length() == 0) {
+      StringColumnStatsDataInspector stringColumnStatsData =
+          (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
+      if (stringColumnStatsData.getNdvEstimator() == null) {
         ndvEstimator = null;
         break;
       } else {
         // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
-            .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors());
+        NumDistinctValueEstimator estimator = stringColumnStatsData.getNdvEstimator();
         if (ndvEstimator == null) {
           ndvEstimator = estimator;
         } else {
@@ -92,13 +93,13 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
     LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
     ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
     if (doAllPartitionContainStats || css.size() < 2) {
-      StringColumnStatsData aggregateData = null;
+      StringColumnStatsDataInspector aggregateData = null;
       for (ColumnStatistics cs : css) {
         ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        StringColumnStatsData newData = cso.getStatsData().getStringStats();
+        StringColumnStatsDataInspector newData =
+            (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
         if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (aggregateData == null) {
           aggregateData = newData.deepCopy();
@@ -146,11 +147,12 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
         double pseudoIndexSum = 0;
         int length = 0;
         int curIndex = -1;
-        StringColumnStatsData aggregateData = null;
+        StringColumnStatsDataInspector aggregateData = null;
         for (ColumnStatistics cs : css) {
           String partName = cs.getStatsDesc().getPartName();
           ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          StringColumnStatsData newData = cso.getStatsData().getStringStats();
+          StringColumnStatsDataInspector newData =
+              (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
           // newData.isSetBitVectors() should be true for sure because we
           // already checked it before.
           if (indexMap.get(partName) != curIndex) {
@@ -185,8 +187,7 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
                 newData.getMaxColLen()));
             aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
           }
-          ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
-              .getNumDistinctValueEstimator(newData.getBitVectors()));
+          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
         }
         if (length > 0) {
           // we have to set ndv
@@ -211,7 +212,7 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
       int numPartsWithStats, Map<String, Double> adjustedIndexMap,
       Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
     int rightBorderInd = numParts;
-    StringColumnStatsData extrapolateStringData = new StringColumnStatsData();
+    StringColumnStatsDataInspector extrapolateStringData = new StringColumnStatsDataInspector();
     Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
     for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
       extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats());
@@ -295,7 +296,6 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
       ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
     }
     extrapolateStringData.setAvgColLen(avgColLen);
-    ;
     extrapolateStringData.setMaxColLen((long) maxColLen);
     extrapolateStringData.setNumNulls(numNulls);
     extrapolateStringData.setNumDVs(ndv);

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
new file mode 100644
index 0000000..937ebf2
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DateColumnStatsDataInspector extends DateColumnStatsData {
+
+  private NumDistinctValueEstimator ndvEstimator;
+
+  public DateColumnStatsDataInspector() {
+    super();
+  }
+
+  public DateColumnStatsDataInspector(long numNulls, long numDVs) {
+    super(numNulls, numDVs);
+  }
+
+  public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) {
+    super(other);
+    if (other.ndvEstimator != null) {
+      super.setBitVectors(ndvEstimator.serialize());
+    }
+  }
+
+  @Override
+  public DateColumnStatsDataInspector deepCopy() {
+    return new DateColumnStatsDataInspector(this);
+  }
+
+  @Override
+  public byte[] getBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.getBitVectors();
+  }
+
+  @Override
+  public ByteBuffer bufferForBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.bufferForBitVectors();
+  }
+
+  @Override
+  public void setBitVectors(byte[] bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void setBitVectors(ByteBuffer bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void unsetBitVectors() {
+    super.unsetBitVectors();
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public boolean isSetBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.isSetBitVectors();
+  }
+
+  @Override
+  public void setBitVectorsIsSet(boolean value) {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    super.setBitVectorsIsSet(value);
+  }
+
+  public NumDistinctValueEstimator getNdvEstimator() {
+    if (isSetBitVectors() && getBitVectors().length != 0) {
+      updateNdvEstimator();
+    }
+    return ndvEstimator;
+  }
+
+  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+    super.unsetBitVectors();
+    this.ndvEstimator = ndvEstimator;
+  }
+
+  private void updateBitVectors() {
+    super.setBitVectors(ndvEstimator.serialize());
+    this.ndvEstimator = null;
+  }
+
+  private void updateNdvEstimator() {
+    this.ndvEstimator = NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(super.getBitVectors());
+    super.unsetBitVectors();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
new file mode 100644
index 0000000..586b5d8
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DecimalColumnStatsDataInspector extends DecimalColumnStatsData {
+
+  private NumDistinctValueEstimator ndvEstimator;
+
+  public DecimalColumnStatsDataInspector() {
+    super();
+  }
+
+  public DecimalColumnStatsDataInspector(long numNulls, long numDVs) {
+    super(numNulls, numDVs);
+  }
+
+  public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector other) {
+    super(other);
+    if (other.ndvEstimator != null) {
+      super.setBitVectors(ndvEstimator.serialize());
+    }
+  }
+
+  @Override
+  public DecimalColumnStatsDataInspector deepCopy() {
+    return new DecimalColumnStatsDataInspector(this);
+  }
+
+  @Override
+  public byte[] getBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.getBitVectors();
+  }
+
+  @Override
+  public ByteBuffer bufferForBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.bufferForBitVectors();
+  }
+
+  @Override
+  public void setBitVectors(byte[] bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void setBitVectors(ByteBuffer bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void unsetBitVectors() {
+    super.unsetBitVectors();
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public boolean isSetBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.isSetBitVectors();
+  }
+
+  @Override
+  public void setBitVectorsIsSet(boolean value) {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    super.setBitVectorsIsSet(value);
+  }
+
+  public NumDistinctValueEstimator getNdvEstimator() {
+    if (isSetBitVectors() && getBitVectors().length != 0) {
+      updateNdvEstimator();
+    }
+    return ndvEstimator;
+  }
+
+  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+    super.unsetBitVectors();
+    this.ndvEstimator = ndvEstimator;
+  }
+
+  private void updateBitVectors() {
+    super.setBitVectors(ndvEstimator.serialize());
+    this.ndvEstimator = null;
+  }
+
+  private void updateNdvEstimator() {
+    this.ndvEstimator = NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(super.getBitVectors());
+    super.unsetBitVectors();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
new file mode 100644
index 0000000..3609ddd
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+
+@SuppressWarnings("serial")
+public class DoubleColumnStatsDataInspector extends DoubleColumnStatsData {
+
+  private NumDistinctValueEstimator ndvEstimator;
+
+  public DoubleColumnStatsDataInspector() {
+    super();
+  }
+
+  public DoubleColumnStatsDataInspector(long numNulls, long numDVs) {
+    super(numNulls, numDVs);
+  }
+
+  public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) {
+    super(other);
+    if (other.ndvEstimator != null) {
+      super.setBitVectors(ndvEstimator.serialize());
+    }
+  }
+
+  @Override
+  public DoubleColumnStatsDataInspector deepCopy() {
+    return new DoubleColumnStatsDataInspector(this);
+  }
+
+  @Override
+  public byte[] getBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.getBitVectors();
+  }
+
+  @Override
+  public ByteBuffer bufferForBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.bufferForBitVectors();
+  }
+
+  @Override
+  public void setBitVectors(byte[] bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void setBitVectors(ByteBuffer bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void unsetBitVectors() {
+    super.unsetBitVectors();
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public boolean isSetBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.isSetBitVectors();
+  }
+
+  @Override
+  public void setBitVectorsIsSet(boolean value) {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    super.setBitVectorsIsSet(value);
+  }
+
+  public NumDistinctValueEstimator getNdvEstimator() {
+    if (isSetBitVectors() && getBitVectors().length != 0) {
+      updateNdvEstimator();
+    }
+    return ndvEstimator;
+  }
+
+  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+    super.unsetBitVectors();
+    this.ndvEstimator = ndvEstimator;
+  }
+
+  private void updateBitVectors() {
+    super.setBitVectors(ndvEstimator.serialize());
+    this.ndvEstimator = null;
+  }
+
+  private void updateNdvEstimator() {
+    this.ndvEstimator = NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(super.getBitVectors());
+    super.unsetBitVectors();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
new file mode 100644
index 0000000..5632d91
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+
+@SuppressWarnings("serial")
+public class LongColumnStatsDataInspector extends LongColumnStatsData {
+
+  private NumDistinctValueEstimator ndvEstimator;
+
+  public LongColumnStatsDataInspector() {
+    super();
+  }
+
+  public LongColumnStatsDataInspector(long numNulls, long numDVs) {
+    super(numNulls, numDVs);
+  }
+
+  public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) {
+    super(other);
+    if (other.ndvEstimator != null) {
+      super.setBitVectors(ndvEstimator.serialize());
+    }
+  }
+
+  @Override
+  public LongColumnStatsDataInspector deepCopy() {
+    return new LongColumnStatsDataInspector(this);
+  }
+
+  @Override
+  public byte[] getBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.getBitVectors();
+  }
+
+  @Override
+  public ByteBuffer bufferForBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.bufferForBitVectors();
+  }
+
+  @Override
+  public void setBitVectors(byte[] bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void setBitVectors(ByteBuffer bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void unsetBitVectors() {
+    super.unsetBitVectors();
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public boolean isSetBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.isSetBitVectors();
+  }
+
+  @Override
+  public void setBitVectorsIsSet(boolean value) {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    super.setBitVectorsIsSet(value);
+  }
+
+  public NumDistinctValueEstimator getNdvEstimator() {
+    if (isSetBitVectors() && getBitVectors().length != 0) {
+      updateNdvEstimator();
+    }
+    return ndvEstimator;
+  }
+
+  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+    super.unsetBitVectors();
+    this.ndvEstimator = ndvEstimator;
+  }
+
+  private void updateBitVectors() {
+    super.setBitVectors(ndvEstimator.serialize());
+    this.ndvEstimator = null;
+  }
+
+  private void updateNdvEstimator() {
+    this.ndvEstimator = NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(super.getBitVectors());
+    super.unsetBitVectors();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
new file mode 100644
index 0000000..2db037b
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.columnstats.cache;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+
+@SuppressWarnings("serial")
+public class StringColumnStatsDataInspector extends StringColumnStatsData {
+
+  private NumDistinctValueEstimator ndvEstimator;
+
+  public StringColumnStatsDataInspector() {
+    super();
+  }
+
+  public StringColumnStatsDataInspector(long maxColLen, double avgColLen,
+      long numNulls, long numDVs) {
+    super(maxColLen, avgColLen, numNulls, numDVs);
+  }
+
+  public StringColumnStatsDataInspector(StringColumnStatsDataInspector other) {
+    super(other);
+    if (other.ndvEstimator != null) {
+      super.setBitVectors(ndvEstimator.serialize());
+    }
+  }
+
+  @Override
+  public StringColumnStatsDataInspector deepCopy() {
+    return new StringColumnStatsDataInspector(this);
+  }
+
+  @Override
+  public byte[] getBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.getBitVectors();
+  }
+
+  @Override
+  public ByteBuffer bufferForBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.bufferForBitVectors();
+  }
+
+  @Override
+  public void setBitVectors(byte[] bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void setBitVectors(ByteBuffer bitVectors) {
+    super.setBitVectors(bitVectors);
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public void unsetBitVectors() {
+    super.unsetBitVectors();
+    this.ndvEstimator = null;
+  }
+
+  @Override
+  public boolean isSetBitVectors() {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    return super.isSetBitVectors();
+  }
+
+  @Override
+  public void setBitVectorsIsSet(boolean value) {
+    if (ndvEstimator != null) {
+      updateBitVectors();
+    }
+    super.setBitVectorsIsSet(value);
+  }
+
+  public NumDistinctValueEstimator getNdvEstimator() {
+    if (isSetBitVectors() && getBitVectors().length != 0) {
+      updateNdvEstimator();
+    }
+    return ndvEstimator;
+  }
+
+  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
+    super.unsetBitVectors();
+    this.ndvEstimator = ndvEstimator;
+  }
+
+  private void updateBitVectors() {
+    super.setBitVectors(ndvEstimator.serialize());
+    this.ndvEstimator = null;
+  }
+
+  private void updateNdvEstimator() {
+    this.ndvEstimator = NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(super.getBitVectors());
+    super.unsetBitVectors();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
index 0ce1847..66be524 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
@@ -19,19 +19,16 @@
 
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 
 public class ColumnStatsMergerFactory {
 
@@ -89,15 +86,15 @@ public class ColumnStatsMergerFactory {
       break;
 
     case LONG_STATS:
-      csd.setLongStats(new LongColumnStatsData());
+      csd.setLongStats(new LongColumnStatsDataInspector());
       break;
 
     case DOUBLE_STATS:
-      csd.setDoubleStats(new DoubleColumnStatsData());
+      csd.setDoubleStats(new DoubleColumnStatsDataInspector());
       break;
 
     case STRING_STATS:
-      csd.setStringStats(new StringColumnStatsData());
+      csd.setStringStats(new StringColumnStatsDataInspector());
       break;
 
     case BINARY_STATS:
@@ -105,11 +102,11 @@ public class ColumnStatsMergerFactory {
       break;
 
     case DECIMAL_STATS:
-      csd.setDecimalStats(new DecimalColumnStatsData());
+      csd.setDecimalStats(new DecimalColumnStatsDataInspector());
       break;
 
     case DATE_STATS:
-      csd.setDateStats(new DateColumnStatsData());
+      csd.setDateStats(new DateColumnStatsDataInspector());
       break;
 
     default:

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
index 2542a00..e783d3c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
@@ -20,16 +20,17 @@
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
 
 public class DateColumnStatsMerger extends ColumnStatsMerger {
   @Override
   public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
-    DateColumnStatsData aggregateData = aggregateColStats.getStatsData().getDateStats();
-    DateColumnStatsData newData = newColStats.getStatsData().getDateStats();
+    DateColumnStatsDataInspector aggregateData =
+        (DateColumnStatsDataInspector) aggregateColStats.getStatsData().getDateStats();
+    DateColumnStatsDataInspector newData =
+        (DateColumnStatsDataInspector) newColStats.getStatsData().getDateStats();
     Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData
         .getLowValue() : newData.getLowValue();
     aggregateData.setLowValue(lowValue);
@@ -37,19 +38,16 @@ public class DateColumnStatsMerger extends ColumnStatsMerger {
         .getHighValue() : newData.getHighValue();
     aggregateData.setHighValue(highValue);
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
-        || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
-      NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(aggregateData.getBitVectors());
-      NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(newData.getBitVectors());
+      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
       long ndv = -1;
       if (oldEst.canMerge(newEst)) {
         oldEst.mergeEstimators(newEst);
         ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setBitVectors(oldEst.serialize());
+        aggregateData.setNdvEstimator(oldEst);
       } else {
         ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 4e8e129..54099f6 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -20,16 +20,17 @@
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
 
 public class DecimalColumnStatsMerger extends ColumnStatsMerger {
   @Override
   public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
-    DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
-    DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
+    DecimalColumnStatsDataInspector aggregateData =
+        (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats();
+    DecimalColumnStatsDataInspector newData =
+        (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats();
     Decimal lowValue = aggregateData.getLowValue() != null
         && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
         .getLowValue() : newData.getLowValue();
@@ -39,19 +40,16 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger {
         .getHighValue() : newData.getHighValue();
     aggregateData.setHighValue(highValue);
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
-        || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
-      NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(aggregateData.getBitVectors());
-      NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(newData.getBitVectors());
+      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
       long ndv = -1;
       if (oldEst.canMerge(newEst)) {
         oldEst.mergeEstimators(newEst);
         ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setBitVectors(oldEst.serialize());
+        aggregateData.setNdvEstimator(oldEst);
       } else {
         ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
index 4ef5c39..817a55d 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java
@@ -20,31 +20,29 @@
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
 
 public class DoubleColumnStatsMerger extends ColumnStatsMerger {
   @Override
   public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
-    DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats();
-    DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats();
+    DoubleColumnStatsDataInspector aggregateData =
+        (DoubleColumnStatsDataInspector) aggregateColStats.getStatsData().getDoubleStats();
+    DoubleColumnStatsDataInspector newData =
+        (DoubleColumnStatsDataInspector) newColStats.getStatsData().getDoubleStats();
     aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
     aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
-        || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
-      NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(aggregateData.getBitVectors());
-      NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(newData.getBitVectors());
+      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
       long ndv = -1;
       if (oldEst.canMerge(newEst)) {
         oldEst.mergeEstimators(newEst);
         ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setBitVectors(oldEst.serialize());
+        aggregateData.setNdvEstimator(oldEst);
       } else {
         ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
index acf7f03..dc048e0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java
@@ -20,31 +20,29 @@
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
 
 public class LongColumnStatsMerger extends ColumnStatsMerger {
   @Override
   public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
-    LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats();
-    LongColumnStatsData newData = newColStats.getStatsData().getLongStats();
+    LongColumnStatsDataInspector aggregateData =
+        (LongColumnStatsDataInspector) aggregateColStats.getStatsData().getLongStats();
+    LongColumnStatsDataInspector newData =
+        (LongColumnStatsDataInspector) newColStats.getStatsData().getLongStats();
     aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
     aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
-        || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
-      NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(aggregateData.getBitVectors());
-      NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(newData.getBitVectors());
+      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
       long ndv = -1;
       if (oldEst.canMerge(newEst)) {
         oldEst.mergeEstimators(newEst);
         ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setBitVectors(oldEst.serialize());
+        aggregateData.setNdvEstimator(oldEst);
       } else {
         ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
index b3cd33c..e353b8f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java
@@ -20,31 +20,29 @@
 package org.apache.hadoop.hive.metastore.columnstats.merge;
 
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 
 public class StringColumnStatsMerger extends ColumnStatsMerger {
   @Override
   public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
-    StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats();
-    StringColumnStatsData newData = newColStats.getStatsData().getStringStats();
+    StringColumnStatsDataInspector aggregateData =
+        (StringColumnStatsDataInspector) aggregateColStats.getStatsData().getStringStats();
+    StringColumnStatsDataInspector newData =
+        (StringColumnStatsDataInspector) newColStats.getStatsData().getStringStats();
     aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
     aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0
-        || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) {
       aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
     } else {
-      NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(aggregateData.getBitVectors());
-      NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory
-          .getNumDistinctValueEstimator(newData.getBitVectors());
+      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
+      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
       long ndv = -1;
       if (oldEst.canMerge(newEst)) {
         oldEst.mergeEstimators(newEst);
         ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setBitVectors(oldEst.serialize());
+        aggregateData.setNdvEstimator(oldEst);
       } else {
         ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
index 54828f2..8312b34 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
@@ -20,10 +20,7 @@ package org.apache.hadoop.hive.metastore;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -35,27 +32,21 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.FileMetadataExprType;
-import org.apache.hadoop.hive.metastore.api.Function;
-import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.metastore.api.InvalidInputException;
 import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
-import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
-import org.mockito.Mockito;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -93,7 +84,7 @@ public class TestOldSchema {
     }
   }
 
-  String bitVectors[] = new String[2];
+  byte bitVectors[][] = new byte[2][];
 
   @Before
   public void setUp() throws Exception {

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
index e31dad3..6709c9d 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
@@ -24,7 +24,6 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.ObjectStore;
 import org.apache.hadoop.hive.metastore.TableType;
@@ -37,14 +36,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
@@ -435,7 +434,7 @@ public class TestCachedStore {
     // Col1
     ColumnStatisticsData data1 = new ColumnStatisticsData();
     ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1);
-    LongColumnStatsData longStats = new LongColumnStatsData();
+    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
     longStats.setLowValue(col1LowVal);
     longStats.setHighValue(col1HighVal);
     longStats.setNumNulls(col1Nulls);
@@ -446,7 +445,7 @@ public class TestCachedStore {
     // Col2
     ColumnStatisticsData data2 = new ColumnStatisticsData();
     ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2);
-    StringColumnStatsData stringStats = new StringColumnStatsData();
+    StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
     stringStats.setMaxColLen(col2MaxColLen);
     stringStats.setAvgColLen(col2AvgColLen);
     stringStats.setNumNulls(col2Nulls);
@@ -718,7 +717,7 @@ public class TestCachedStore {
 
     ColumnStatisticsData data = new ColumnStatisticsData();
     ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
-    LongColumnStatsData longStats = new LongColumnStatsData();
+    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
     longStats.setLowValue(0);
     longStats.setHighValue(100);
     longStats.setNumNulls(50);
@@ -784,7 +783,7 @@ public class TestCachedStore {
     
     ColumnStatisticsData data = new ColumnStatisticsData();
     ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
-    LongColumnStatsData longStats = new LongColumnStatsData();
+    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
     longStats.setLowValue(0);
     longStats.setHighValue(100);
     longStats.setNumNulls(50);
@@ -854,7 +853,7 @@ public class TestCachedStore {
     
     ColumnStatisticsData data = new ColumnStatisticsData();
     ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
-    LongColumnStatsData longStats = new LongColumnStatsData();
+    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
     longStats.setLowValue(0);
     longStats.setHighValue(100);
     longStats.setNumNulls(50);

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index d96f432..2b2c004 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -24,10 +24,7 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
@@ -37,15 +34,15 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.QueryPlan;
@@ -63,6 +60,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
@@ -70,6 +68,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspect
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * ColumnStatsTask implementation.
@@ -136,8 +136,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       statsObj.getStatsData().getDoubleStats().setLowValue(d);
     } else if (fName.equals("ndvbitvector")) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
-      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
-      statsObj.getStatsData().getDoubleStats().setBitVectors(v);;
+      byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDoubleStats().setBitVectors(buf);
     }
   }
 
@@ -157,8 +157,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
     } else if (fName.equals("ndvbitvector")) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
-      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
-      statsObj.getStatsData().getDecimalStats().setBitVectors(v);;
+      byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDecimalStats().setBitVectors(buf);
     }
   }
 
@@ -182,8 +182,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       statsObj.getStatsData().getLongStats().setLowValue(v);
     } else if (fName.equals("ndvbitvector")) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
-      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
-      statsObj.getStatsData().getLongStats().setBitVectors(v);;
+      byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getLongStats().setBitVectors(buf);
     }
   }
 
@@ -203,8 +203,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       statsObj.getStatsData().getStringStats().setMaxColLen(v);
     } else if (fName.equals("ndvbitvector")) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
-      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
-      statsObj.getStatsData().getStringStats().setBitVectors(v);;
+      byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getStringStats().setBitVectors(buf);
     }
   }
 
@@ -238,8 +238,8 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays()));
     } else if (fName.equals("ndvbitvector")) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
-      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
-      statsObj.getStatsData().getDateStats().setBitVectors(v);;
+      byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDateStats().setBitVectors(buf);
     }
   }
 
@@ -255,15 +255,15 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
       ColumnStatisticsData statsData = new ColumnStatisticsData();
 
       if (s.equalsIgnoreCase("long")) {
-        LongColumnStatsData longStats = new LongColumnStatsData();
+        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
         statsData.setLongStats(longStats);
         statsObj.setStatsData(statsData);
       } else if (s.equalsIgnoreCase("double")) {
-        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
         statsData.setDoubleStats(doubleStats);
         statsObj.setStatsData(statsData);
       } else if (s.equalsIgnoreCase("string")) {
-        StringColumnStatsData stringStats = new StringColumnStatsData();
+        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
         statsData.setStringStats(stringStats);
         statsObj.setStatsData(statsData);
       } else if (s.equalsIgnoreCase("boolean")) {
@@ -275,11 +275,11 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
         statsData.setBinaryStats(binaryStats);
         statsObj.setStatsData(statsData);
       } else if (s.equalsIgnoreCase("decimal")) {
-        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
         statsData.setDecimalStats(decimalStats);
         statsObj.setStatsData(statsData);
       } else if (s.equalsIgnoreCase("date")) {
-        DateColumnStatsData dateStats = new DateColumnStatsData();
+        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
         statsData.setDateStats(dateStats);
         statsObj.setStatsData(statsData);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 2acc777..82fbf28 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -26,8 +26,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -35,14 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Date;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.QueryPlan;
@@ -55,6 +53,8 @@ import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * ColumnStatsUpdateTask implementation. For example, ALTER TABLE src_stat
@@ -101,7 +101,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
     if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint")
         || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int")
         || columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) {
-      LongColumnStatsData longStats = new LongColumnStatsData();
+      LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
       longStats.setNumNullsIsSet(false);
       longStats.setNumDVsIsSet(false);
       longStats.setLowValueIsSet(false);
@@ -125,7 +125,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
       statsData.setLongStats(longStats);
       statsObj.setStatsData(statsData);
     } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
-      DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+      DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
       doubleStats.setNumNullsIsSet(false);
       doubleStats.setNumDVsIsSet(false);
       doubleStats.setLowValueIsSet(false);
@@ -150,7 +150,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
       statsObj.setStatsData(statsData);
     } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char")
               || columnType.toLowerCase().startsWith("varchar")) { //char(x),varchar(x) types
-      StringColumnStatsData stringStats = new StringColumnStatsData();
+      StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
       stringStats.setMaxColLenIsSet(false);
       stringStats.setAvgColLenIsSet(false);
       stringStats.setNumNullsIsSet(false);
@@ -216,7 +216,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
       statsData.setBinaryStats(binaryStats);
       statsObj.setStatsData(statsData);
     } else if (columnType.toLowerCase().startsWith("decimal")) { //decimal(a,b) type
-      DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+      DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
       decimalStats.setNumNullsIsSet(false);
       decimalStats.setNumDVsIsSet(false);
       decimalStats.setLowValueIsSet(false);
@@ -244,7 +244,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
       statsData.setDecimalStats(decimalStats);
       statsObj.setStatsData(statsData);
     } else if (columnType.equalsIgnoreCase("date")) {
-      DateColumnStatsData dateStats = new DateColumnStatsData();
+      DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
       Map<String, String> mapProp = work.getMapProp();
       for (Entry<String, String> entry : mapProp.entrySet()) {
         String fName = entry.getKey();

http://git-wip-us.apache.org/repos/asf/hive/blob/9a36aa90/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index 2380073..a7f2967 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.metadata.formatting;
 
+import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -54,6 +55,7 @@ import org.apache.hive.common.util.HiveStringUtils;
 
 import java.math.BigInteger;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
@@ -176,6 +178,16 @@ public final class MetaDataFormatUtils {
     return writableValue.toString();
   }
 
+  private static String convertToString(byte[] buf) {
+    if (buf == null || buf.length == 0) {
+      return "";
+    }
+    byte[] sub = new byte[2];
+    sub[0] = (byte) buf[0];
+    sub[1] = (byte) buf[1];
+    return new String(sub);
+  }
+
   private static ColumnStatisticsObj getColumnStatisticsObject(String colName,
       String colType, List<ColumnStatisticsObj> colStats) {
     if (colStats != null && !colStats.isEmpty()) {
@@ -700,7 +712,7 @@ public final class MetaDataFormatUtils {
         } else if (csd.isSetStringStats()) {
           StringColumnStatsData scsd = csd.getStringStats();
           appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
-              scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(),
+              convertToString(scsd.getBitVectors()), scsd.getAvgColLen(),
               scsd.getMaxColLen(), "", "");
         } else if (csd.isSetBooleanStats()) {
           BooleanColumnStatsData bcsd = csd.getBooleanStats();
@@ -710,22 +722,26 @@ public final class MetaDataFormatUtils {
           DecimalColumnStatsData dcsd = csd.getDecimalStats();
           appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()),
               convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(),
-              dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(),
+              convertToString(dcsd.getBitVectors()),
               "", "", "", "");
         } else if (csd.isSetDoubleStats()) {
           DoubleColumnStatsData dcsd = csd.getDoubleStats();
           appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
-              dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
+              dcsd.getNumDVs(), convertToString(dcsd.getBitVectors()),
+              "", "", "", "");
         } else if (csd.isSetLongStats()) {
           LongColumnStatsData lcsd = csd.getLongStats();
           appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(),
-              lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", "");
+              lcsd.getNumDVs(), convertToString(lcsd.getBitVectors()),
+              "", "", "", "");
         } else if (csd.isSetDateStats()) {
           DateColumnStatsData dcsd = csd.getDateStats();
           appendColumnStats(tableInfo,
               convertToString(dcsd.getLowValue()),
               convertToString(dcsd.getHighValue()),
-              dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
+              dcsd.getNumNulls(), dcsd.getNumDVs(),
+              convertToString(dcsd.getBitVectors()),
+              "", "", "", "");
         }
       } else {
         appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", "");