You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/01/29 06:25:54 UTC

[1/4] hive git commit: HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)

Repository: hive
Updated Branches:
  refs/heads/master 0c7f2d66b -> 7b2f6703f


http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
new file mode 100644
index 0000000..b0d7662
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
@@ -0,0 +1,634 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.FunctionType;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.metastore.api.ResourceType;
+import org.apache.hadoop.hive.metastore.api.ResourceUri;
+import org.apache.hadoop.hive.metastore.api.Role;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ *
+ */
+public class TestHBaseStoreBitVector {
+  private static final Logger LOG = LoggerFactory.getLogger(TestHBaseStoreBitVector.class.getName());
+  static Map<String, String> emptyParameters = new HashMap<String, String>();
+  // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key
+  static final int NUM_PART_KEYS = 1;
+  static final int NUM_PARTITIONS = 5;
+  static final String DB = "db";
+  static final String TBL = "tbl";
+  static final String COL = "col";
+  static final String PART_KEY_PREFIX = "part";
+  static final String PART_VAL_PREFIX = "val";
+  static final String PART_KV_SEPARATOR = "=";
+  static final List<String> PART_KEYS = new ArrayList<String>();
+  static final List<String> PART_VALS = new ArrayList<String>();
+  // Initialize mock partitions
+  static {
+    for (int i = 1; i <= NUM_PART_KEYS; i++) {
+      PART_KEYS.add(PART_KEY_PREFIX + i);
+    }
+    for (int i = 1; i <= NUM_PARTITIONS; i++) {
+      PART_VALS.add(PART_VAL_PREFIX + i);
+    }
+  }
+  static final long DEFAULT_TIME = System.currentTimeMillis();
+  static final String PART_KEY = "part";
+  static final String LONG_COL = "longCol";
+  static final String LONG_TYPE = "long";
+  static final String INT_TYPE = "int";
+  static final String INT_VAL = "1234";
+  static final String DOUBLE_COL = "doubleCol";
+  static final String DOUBLE_TYPE = "double";
+  static final String DOUBLE_VAL = "3.1415";
+  static final String STRING_COL = "stringCol";
+  static final String STRING_TYPE = "string";
+  static final String STRING_VAL = "stringval";
+  static final String DECIMAL_COL = "decimalCol";
+  static final String DECIMAL_TYPE = "decimal(5,3)";
+  static final String DECIMAL_VAL = "12.123";
+  static List<ColumnStatisticsObj> longColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> doubleColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> stringColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> decimalColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+  @Mock HTableInterface htable;
+  SortedMap<String, Cell> rows = new TreeMap<>();
+  HBaseStore store;
+
+
+  @BeforeClass
+  public static void beforeTest() {
+    // All data intitializations
+    populateMockStats();
+  }
+
+  private static void populateMockStats() {
+    ColumnStatisticsObj statsObj;
+    // Add NUM_PARTITIONS ColumnStatisticsObj of each type
+    // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition
+    // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it
+    for (int i = 0; i < NUM_PARTITIONS; i++) {
+      statsObj = mockLongStats(i);
+      longColStatsObjs.add(statsObj);
+      statsObj = mockDoubleStats(i);
+      doubleColStatsObjs.add(statsObj);
+      statsObj = mockStringStats(i);
+      stringColStatsObjs.add(statsObj);
+      statsObj = mockDecimalStats(i);
+      decimalColStatsObjs.add(statsObj);
+    }
+  }
+
+  private static ColumnStatisticsObj mockLongStats(int i) {
+    long high = 120938479124L + 100*i;
+    long low = -12341243213412124L - 50*i;
+    long nulls = 23 + i;
+    long dVs = 213L + 10*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{1, 2, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(LONG_COL);
+    colStatsObj.setColType(LONG_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    LongColumnStatsData longData = new LongColumnStatsData();
+    longData.setHighValue(high);
+    longData.setLowValue(low);
+    longData.setNumNulls(nulls);
+    longData.setNumDVs(dVs);
+    longData.setBitVectors(bitVectors);
+    data.setLongStats(longData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockDoubleStats(int i) {
+    double high = 123423.23423 + 100*i;
+    double low = 0.00001234233 - 50*i;
+    long nulls = 92 + i;
+    long dVs = 1234123421L + 10*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 2, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(DOUBLE_COL);
+    colStatsObj.setColType(DOUBLE_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
+    doubleData.setHighValue(high);
+    doubleData.setLowValue(low);
+    doubleData.setNumNulls(nulls);
+    doubleData.setNumDVs(dVs);
+    doubleData.setBitVectors(bitVectors);
+    data.setDoubleStats(doubleData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockStringStats(int i) {
+    long maxLen = 1234 + 10*i;
+    double avgLen = 32.3 + i;
+    long nulls = 987 + 10*i;
+    long dVs = 906 + i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(STRING_COL);
+    colStatsObj.setColType(STRING_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    StringColumnStatsData stringData = new StringColumnStatsData();
+    stringData.setMaxColLen(maxLen);
+    stringData.setAvgColLen(avgLen);
+    stringData.setNumNulls(nulls);
+    stringData.setNumDVs(dVs);
+    stringData.setBitVectors(bitVectors);
+    data.setStringStats(stringData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockDecimalStats(int i) {
+    Decimal high = new Decimal();
+    high.setScale((short)3);
+    String strHigh = String.valueOf(3876 + 100*i);
+    high.setUnscaled(strHigh.getBytes());
+    Decimal low = new Decimal();
+    low.setScale((short)3);
+    String strLow = String.valueOf(38 + i);
+    low.setUnscaled(strLow.getBytes());
+    long nulls = 13 + i;
+    long dVs = 923947293L + 100*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(DECIMAL_COL);
+    colStatsObj.setColType(DECIMAL_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
+    decimalData.setHighValue(high);
+    decimalData.setLowValue(low);
+    decimalData.setNumNulls(nulls);
+    decimalData.setNumDVs(dVs);
+    decimalData.setBitVectors(bitVectors);
+    data.setDecimalStats(decimalData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  @AfterClass
+  public static void afterTest() {
+  }
+
+
+  @Before
+  public void init() throws IOException {
+    MockitoAnnotations.initMocks(this);
+    HiveConf conf = new HiveConf();
+    conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+    store = MockUtils.init(conf, htable, rows);
+  }
+
+  @Test
+  public void longTableStatistics() throws Exception {
+    createMockTable(LONG_COL, LONG_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = longColStatsObjs.get(0);
+    LongColumnStatsData longData = obj.getStatsData().getLongStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+    // Compare LongColumnStatsData
+    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void doubleTableStatistics() throws Exception {
+    createMockTable(DOUBLE_COL, DOUBLE_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+    // Compare DoubleColumnStatsData
+    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+    Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void stringTableStatistics() throws Exception {
+    createMockTable(STRING_COL, STRING_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+    // Compare StringColumnStatsData
+    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+    Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void decimalTableStatistics() throws Exception {
+    createMockTable(DECIMAL_COL, DECIMAL_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+    // Compare DecimalColumnStatsData
+    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void longPartitionStatistics() throws Exception {
+    createMockTableAndPartition(INT_TYPE, INT_VAL);
+    // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = longColStatsObjs.get(0);
+    LongColumnStatsData longData = obj.getStatsData().getLongStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(INT_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+    // Compare LongColumnStatsData
+    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void doublePartitionStatistics() throws Exception {
+    createMockTableAndPartition(DOUBLE_TYPE, DOUBLE_VAL);
+    // Add partition stats for: DOUBLE_COL and partition: {PART_KEY, DOUBLE_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DOUBLE_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(DOUBLE_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+    // Compare DoubleColumnStatsData
+    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+    Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void stringPartitionStatistics() throws Exception {
+    createMockTableAndPartition(STRING_TYPE, STRING_VAL);
+    // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(STRING_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+    // Compare StringColumnStatsData
+    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+    Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void decimalPartitionStatistics() throws Exception {
+    createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
+    // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(DECIMAL_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+    // Compare DecimalColumnStatsData
+    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+  }
+
+  private Table createMockTable(String name, String type) throws Exception {
+    List<FieldSchema> cols = new ArrayList<FieldSchema>();
+    cols.add(new FieldSchema(name, type, ""));
+    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("key", "value");
+    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+        serde, new ArrayList<String>(), new ArrayList<Order>(), params);
+    int currentTime = (int)(System.currentTimeMillis() / 1000);
+    Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+        emptyParameters, null, null, null);
+    store.createTable(table);
+    return table;
+  }
+
+  private Table createMockTableAndPartition(String partType, String partVal) throws Exception {
+    List<FieldSchema> cols = new ArrayList<FieldSchema>();
+    cols.add(new FieldSchema("col1", partType, ""));
+    List<String> vals = new ArrayList<String>();
+    vals.add(partVal);
+    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("key", "value");
+    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+        serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params);
+    int currentTime = (int)(System.currentTimeMillis() / 1000);
+    Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+        emptyParameters, null, null, null);
+    store.createTable(table);
+    Partition part = new Partition(vals, DB, TBL, currentTime, currentTime, sd,
+        emptyParameters);
+    store.addPartition(part);
+    return table;
+  }
+  /**
+   * Returns a dummy table level ColumnStatisticsDesc with default values
+   */
+  private ColumnStatisticsDesc getMockTblColStatsDesc() {
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+    desc.setLastAnalyzed(DEFAULT_TIME);
+    desc.setDbName(DB);
+    desc.setTableName(TBL);
+    desc.setIsTblLevel(true);
+    return desc;
+  }
+
+  /**
+   * Returns a dummy partition level ColumnStatisticsDesc
+   */
+  private ColumnStatisticsDesc getMockPartColStatsDesc(String partKey, String partVal) {
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+    desc.setLastAnalyzed(DEFAULT_TIME);
+    desc.setDbName(DB);
+    desc.setTableName(TBL);
+    // part1=val1
+    desc.setPartName(partKey + PART_KV_SEPARATOR + partVal);
+    desc.setIsTblLevel(false);
+    return desc;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/pom.xml
----------------------------------------------------------------------
diff --git a/ql/pom.xml b/ql/pom.xml
index 358cd2a..f19a225 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -397,11 +397,6 @@
       <version>${guava.version}</version>
     </dependency>
     <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <version>${protobuf.version}</version>
-    </dependency>
-    <dependency>
       <groupId>com.googlecode.javaewah</groupId>
       <artifactId>JavaEWAH</artifactId>
       <version>${javaewah.version}</version>

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index 7914471..f9a9fd2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -121,6 +121,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       double d = ((DoubleObjectInspector) oi).get(o);
       statsObj.getStatsData().getDoubleStats().setLowValue(d);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDoubleStats().setBitVectors(v);;
     }
   }
 
@@ -138,6 +142,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
       statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDecimalStats().setBitVectors(v);;
     }
   }
 
@@ -159,6 +167,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       long  v = ((LongObjectInspector) oi).get(o);
       statsObj.getStatsData().getLongStats().setLowValue(v);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getLongStats().setBitVectors(v);;
     }
   }
 
@@ -176,6 +188,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("maxlength")) {
       long v = ((LongObjectInspector) oi).get(o);
       statsObj.getStatsData().getStringStats().setMaxColLen(v);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getStringStats().setBitVectors(v);;
     }
   }
 
@@ -207,6 +223,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o);
       statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays()));
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDateStats().setBitVectors(v);;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index 1f30cbd..bb1bbad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -25,6 +25,8 @@ import java.util.Map;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.conf.HiveVariableSource;
@@ -201,60 +203,6 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
     throw new SemanticException ("Unknown partition key : " + partKey);
   }
 
-  private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
-    int numBitVectors;
-    float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
-
-    if (percentageError < 0.0) {
-      throw new SemanticException("hive.stats.ndv.error can't be negative");
-    } else if (percentageError <= 2.4) {
-      numBitVectors = 1024;
-      LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
-      LOG.info("Choosing 1024 bit vectors..");
-    } else if (percentageError <= 3.4 ) {
-      numBitVectors = 1024;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 1024 bit vectors..");
-    } else if (percentageError <= 4.8) {
-      numBitVectors = 512;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 512 bit vectors..");
-     } else if (percentageError <= 6.8) {
-      numBitVectors = 256;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 256 bit vectors..");
-    } else if (percentageError <= 9.7) {
-      numBitVectors = 128;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 128 bit vectors..");
-    } else if (percentageError <= 13.8) {
-      numBitVectors = 64;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 64 bit vectors..");
-    } else if (percentageError <= 19.6) {
-      numBitVectors = 32;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 32 bit vectors..");
-    } else if (percentageError <= 28.2) {
-      numBitVectors = 16;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 16 bit vectors..");
-    } else if (percentageError <= 40.9) {
-      numBitVectors = 8;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 8 bit vectors..");
-    } else if (percentageError <= 61.0) {
-      numBitVectors = 4;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 4 bit vectors..");
-    } else {
-      numBitVectors = 2;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 2 bit vectors..");
-    }
-    return numBitVectors;
-  }
-
   private List<String> getColumnTypes(List<String> colNames)
       throws SemanticException{
     List<String> colTypes = new LinkedList<String>();
@@ -396,7 +344,12 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
         isTableLevel = true;
       }
       colType = getColumnTypes(colNames);
-      int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
+      int numBitVectors;
+      try {
+        numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
+      } catch (Exception e) {
+        throw new SemanticException(e.getMessage());
+      }
       rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats);
       rewrittenTree = genRewrittenTree(rewrittenQuery);
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b4cf58f..ea506fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -1558,4 +1559,58 @@ public class StatsUtils {
       return Long.MAX_VALUE;
     }
   }
+
+  public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
+    int numBitVectors;
+    float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
+
+    if (percentageError < 0.0) {
+      throw new SemanticException("hive.stats.ndv.error can't be negative");
+    } else if (percentageError <= 2.4) {
+      numBitVectors = 1024;
+      LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 3.4 ) {
+      numBitVectors = 1024;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 4.8) {
+      numBitVectors = 512;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 512 bit vectors..");
+     } else if (percentageError <= 6.8) {
+      numBitVectors = 256;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 256 bit vectors..");
+    } else if (percentageError <= 9.7) {
+      numBitVectors = 128;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 128 bit vectors..");
+    } else if (percentageError <= 13.8) {
+      numBitVectors = 64;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 64 bit vectors..");
+    } else if (percentageError <= 19.6) {
+      numBitVectors = 32;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 32 bit vectors..");
+    } else if (percentageError <= 28.2) {
+      numBitVectors = 16;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 16 bit vectors..");
+    } else if (percentageError <= 40.9) {
+      numBitVectors = 8;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 8 bit vectors..");
+    } else if (percentageError <= 61.0) {
+      numBitVectors = 4;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 4 bit vectors..");
+    } else {
+      numBitVectors = 2;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 2 bit vectors..");
+    }
+    return numBitVectors;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index 0e96f89..d6ca73f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -43,8 +43,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * GenericUDAFComputeStats
@@ -401,6 +399,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(getValueObjectInspector());
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columnType");
@@ -408,11 +407,13 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("max");
         fname.add("countnulls");
         fname.add("numdistinctvalues");
+        fname.add("ndvbitvector");
 
-        result = new Object[5];
+        result = new Object[6];
         result[0] = new Text();
         result[3] = new LongWritable(0);
         result[4] = new LongWritable(0);
+        result[5] = new Text();
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
             foi);
@@ -448,6 +449,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         serializeCommon(result);
         long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0;
         ((LongWritable) result[4]).set(dv);
+        if (numDV != null) {
+          ((Text) result[5]).set(numDV.serialize());
+        }
 
         return result;
       }
@@ -795,6 +799,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columntype");
@@ -802,13 +807,15 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("avglength");
         fname.add("countnulls");
         fname.add("numdistinctvalues");
+        fname.add("ndvbitvector");
 
-        result = new Object[5];
+        result = new Object[6];
         result[0] = new Text();
         result[1] = new LongWritable(0);
         result[2] = new DoubleWritable(0);
         result[3] = new LongWritable(0);
         result[4] = new LongWritable(0);
+        result[5] = new Text();
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
             foi);
@@ -1003,7 +1010,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       ((DoubleWritable) result[2]).set(avgLength);
       ((LongWritable) result[3]).set(myagg.countNulls);
       ((LongWritable) result[4]).set(numDV);
-
+      if (myagg.numBitVectors != 0) {
+        ((Text) result[5]).set(myagg.numDV.serialize());
+      }
       return result;
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
index bfed116..ee1c2ae 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
@@ -422,7 +422,7 @@ from char_udf_1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
 PREHOOK: query: select
   min(c2),
   min(c4)

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index b7c9075..2545c03 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -66,7 +66,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -186,7 +186,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -199,7 +199,7 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     columns _col0,_col1
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
                     escape.delim \
                     hive.serialization.extend.additional.nesting.levels true
                     serialization.escape.crlf true
@@ -264,7 +264,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -384,7 +384,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -397,7 +397,7 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     columns _col0,_col1
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
                     escape.delim \
                     hive.serialization.extend.additional.nesting.levels true
                     serialization.escape.crlf true
@@ -462,7 +462,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -542,7 +542,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 9685202..39f45ae 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             File Output Operator
               compressed: false
@@ -177,7 +177,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -261,7 +261,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -342,7 +342,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             File Output Operator
               compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index 0aadae3..4cd12c4 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -187,7 +187,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true
@@ -588,7 +588,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out
index b57a862..d9c47d0 100644
--- a/ql/src/test/results/clientpositive/compute_stats_date.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out
@@ -47,7 +47,7 @@ select compute_stats(fl_date, 16) from tab_date
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_date
 #### A masked pattern was here ####
-{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18}
+{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18,"ndvbitvector":"{0, 1, 2, 3, 4, 5}{0, 1, 2, 3}{0}{0, 1, 2, 6}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 2}{0, 1, 2, 3, 4}{0, 1, 2, 4, 5}{0, 1, 2, 3}{0, 1, 2, 3, 5}{0, 1, 2, 3, 4, 5}{0, 1, 2, 3, 4}"}
 PREHOOK: query: explain
 analyze table tab_date compute statistics for columns fl_date
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
index 35abb37..c204ab6 100644
--- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 18) from tab_decimal
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_decimal
 #### A masked pattern was here ####
-{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13}
+{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_double.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out
index f6b4052..0a67ecd 100644
--- a/ql/src/test/results/clientpositive/compute_stats_double.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_double
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_double
 #### A masked pattern was here ####
-{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11}
+{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
index f76c760..a6cb9af 100644
--- a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
@@ -34,7 +34,7 @@ POSTHOOK: query: select compute_stats(b, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(c, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty
@@ -43,7 +43,7 @@ POSTHOOK: query: select compute_stats(c, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(d, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty
@@ -52,7 +52,7 @@ POSTHOOK: query: select compute_stats(d, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(e, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_long.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out
index 2c6171d..b6f2b10 100644
--- a/ql/src/test/results/clientpositive/compute_stats_long.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_int
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_int
 #### A masked pattern was here ####
-{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11}
+{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_string.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out
index bdf9d85..fbd0e6d 100644
--- a/ql/src/test/results/clientpositive/compute_stats_string.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_string
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_string
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7}
+{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
index 7fa3089..8f50a43 100644
--- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
@@ -203,7 +203,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
index ae39d18..b46f509 100644
--- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
@@ -211,7 +211,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
index 853bc4a..459d93b 100644
--- a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
@@ -416,7 +416,7 @@ from varchar_udf_1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@varchar_udf_1
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
 PREHOOK: query: select
   min(c2),
   min(c4)


[4/4] hive git commit: HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)

Posted by px...@apache.org.
HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b2f6703
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b2f6703
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b2f6703

Branch: refs/heads/master
Commit: 7b2f6703f172a71d595159c4f395f942583d66b9
Parents: 0c7f2d6
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Jan 28 21:25:33 2016 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Jan 28 21:25:33 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hive/common/HiveStatsUtils.java      |  59 ++
 .../metastore/hbase/TestHBaseSchemaTool.java    |  12 +-
 .../test/resources/testconfiguration.properties |   1 +
 metastore/if/hive_metastore.thrift              |  21 +-
 metastore/pom.xml                               |   5 +
 .../metastore/hbase/HbaseMetastoreProto.java    | 411 ++++++++----
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp | 163 +++++
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |  93 ++-
 .../metastore/api/BinaryColumnStatsData.java    | 112 +++-
 .../metastore/api/BooleanColumnStatsData.java   | 112 +++-
 .../hive/metastore/api/DateColumnStatsData.java | 114 +++-
 .../metastore/api/DecimalColumnStatsData.java   | 114 +++-
 .../metastore/api/DoubleColumnStatsData.java    | 114 +++-
 .../hive/metastore/api/LongColumnStatsData.java | 114 +++-
 .../metastore/api/StringColumnStatsData.java    | 112 +++-
 .../src/gen/thrift/gen-php/metastore/Types.php  | 161 +++++
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  | 105 ++-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |  28 +-
 .../metastore/NumDistinctValueEstimator.java    | 367 +++++++++++
 .../hadoop/hive/metastore/hbase/HBaseUtils.java | 152 ++---
 .../hadoop/hive/metastore/hbase/StatsCache.java |   7 +-
 .../stats/BinaryColumnStatsAggregator.java      |   2 +-
 .../stats/BooleanColumnStatsAggregator.java     |   2 +-
 .../hbase/stats/ColumnStatsAggregator.java      |   6 +-
 .../stats/ColumnStatsAggregatorFactory.java     |  26 +-
 .../stats/DecimalColumnStatsAggregator.java     |  24 +-
 .../stats/DoubleColumnStatsAggregator.java      |  12 +-
 .../hbase/stats/LongColumnStatsAggregator.java  |  12 +-
 .../stats/StringColumnStatsAggregator.java      |  12 +-
 .../metastore/hbase/hbase_metastore_proto.proto |   1 +
 ...stHBaseAggregateStatsCacheWithBitVector.java | 187 ++++++
 .../hbase/TestHBaseStoreBitVector.java          | 634 +++++++++++++++++++
 ql/pom.xml                                      |   5 -
 .../hadoop/hive/ql/exec/ColumnStatsTask.java    |  20 +
 .../ql/parse/ColumnStatsSemanticAnalyzer.java   |  63 +-
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  55 ++
 .../ql/udf/generic/GenericUDAFComputeStats.java |  19 +-
 .../clientpositive/char_udf1.q.java1.7.out      |   2 +-
 .../clientpositive/columnstats_partlvl.q.out    |  16 +-
 .../clientpositive/columnstats_partlvl_dp.q.out |   8 +-
 .../clientpositive/columnstats_tbllvl.q.out     |   4 +-
 .../clientpositive/compute_stats_date.q.out     |   2 +-
 .../clientpositive/compute_stats_decimal.q.out  |   2 +-
 .../clientpositive/compute_stats_double.q.out   |   2 +-
 .../compute_stats_empty_table.q.out             |   6 +-
 .../clientpositive/compute_stats_long.q.out     |   2 +-
 .../clientpositive/compute_stats_string.q.out   |   2 +-
 .../display_colstats_tbllvl.q.out               |   2 +-
 .../temp_table_display_colstats_tbllvl.q.out    |   2 +-
 .../clientpositive/varchar_udf1.q.java1.7.out   |   2 +-
 50 files changed, 3131 insertions(+), 378 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
index 9193f80..7c9d72f 100644
--- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
@@ -21,9 +21,13 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * HiveStatsUtils.
@@ -32,6 +36,7 @@ import org.apache.hadoop.fs.Path;
  */
 
 public class HiveStatsUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(HiveStatsUtils.class);
 
   /**
    * Get all file status from a root path and recursively go deep into certain levels.
@@ -73,4 +78,58 @@ public class HiveStatsUtils {
     return fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER);
   }
 
+  public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Exception {
+    int numBitVectors;
+    float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
+
+    if (percentageError < 0.0) {
+      throw new Exception("hive.stats.ndv.error can't be negative");
+    } else if (percentageError <= 2.4) {
+      numBitVectors = 1024;
+      LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 3.4 ) {
+      numBitVectors = 1024;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 4.8) {
+      numBitVectors = 512;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 512 bit vectors..");
+     } else if (percentageError <= 6.8) {
+      numBitVectors = 256;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 256 bit vectors..");
+    } else if (percentageError <= 9.7) {
+      numBitVectors = 128;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 128 bit vectors..");
+    } else if (percentageError <= 13.8) {
+      numBitVectors = 64;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 64 bit vectors..");
+    } else if (percentageError <= 19.6) {
+      numBitVectors = 32;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 32 bit vectors..");
+    } else if (percentageError <= 28.2) {
+      numBitVectors = 16;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 16 bit vectors..");
+    } else if (percentageError <= 40.9) {
+      numBitVectors = 8;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 8 bit vectors..");
+    } else if (percentageError <= 61.0) {
+      numBitVectors = 4;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 4 bit vectors..");
+    } else {
+      numBitVectors = 2;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 2 bit vectors..");
+    }
+    return numBitVectors;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
index 9fbbf90..79c9e08 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
@@ -468,9 +468,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
             "\"tableType\":\"\"} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " +
             "col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
             "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
-            "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+            "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
             "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78," +
-        "\"numNulls\":29,\"numDVs\":397}}}" + lsep +
+        "\"numNulls\":29,\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep +
         "{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," +
         "\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," +
             "\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," +
@@ -519,9 +519,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
         "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " +
         "stats: column col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
         "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
-        "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+        "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
         "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," +
-        "\"numDVs\":397}}}" + lsep,  outStr.toString());
+        "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep,  outStr.toString());
 
     outStr = new ByteArrayOutputStream();
     out = new PrintStream(outStr);
@@ -533,9 +533,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
         "\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " +
         "col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
         "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
-        "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+        "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
         "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," +
-        "\"numDVs\":397}}}" + lsep, outStr.toString());
+        "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep, outStr.toString());
 
     outStr = new ByteArrayOutputStream();
     out = new PrintStream(outStr);

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index ec6a2c7..f8aa146 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -393,6 +393,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   orc_ppd_basic.q,\
   orc_merge_diff_fs.q,\
   stats_filemetadata.q,\
+  tez_aggr_part_stats.q,\
   tez_bmj_schema_evolution.q,\
   tez_dml.q,\
   tez_fsstat.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift
index 81837e6..9d8c092 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -325,34 +325,39 @@ struct Index {
 struct BooleanColumnStatsData {
 1: required i64 numTrues,
 2: required i64 numFalses,
-3: required i64 numNulls
+3: required i64 numNulls,
+4: optional string bitVectors
 }
 
 struct DoubleColumnStatsData {
 1: optional double lowValue,
 2: optional double highValue,
 3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
 }
 
 struct LongColumnStatsData {
 1: optional i64 lowValue,
 2: optional i64 highValue,
 3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
 }
 
 struct StringColumnStatsData {
 1: required i64 maxColLen,
 2: required double avgColLen,
 3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
 }
 
 struct BinaryColumnStatsData {
 1: required i64 maxColLen,
 2: required double avgColLen,
-3: required i64 numNulls
+3: required i64 numNulls,
+4: optional string bitVectors
 }
 
 
@@ -365,7 +370,8 @@ struct DecimalColumnStatsData {
 1: optional Decimal lowValue,
 2: optional Decimal highValue,
 3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
 }
 
 struct Date {
@@ -376,7 +382,8 @@ struct DateColumnStatsData {
 1: optional Date lowValue,
 2: optional Date highValue,
 3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
 }
 
 union ColumnStatisticsData {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/pom.xml
----------------------------------------------------------------------
diff --git a/metastore/pom.xml b/metastore/pom.xml
index a8e84a1..18c1f9c 100644
--- a/metastore/pom.xml
+++ b/metastore/pom.xml
@@ -44,6 +44,11 @@
       <artifactId>hive-shims</artifactId>
       <version>${project.version}</version>
     </dependency>
+	<dependency>
+		<groupId>javolution</groupId>
+		<artifactId>javolution</artifactId>
+		<version>${javolution.version}</version>
+	</dependency>
     <!-- inter-project -->
     <dependency>
       <groupId>com.google.guava</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
index 39a7278..3b2d7b5 100644
--- a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
+++ b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
@@ -3918,6 +3918,21 @@ public final class HbaseMetastoreProto {
      */
     com.google.protobuf.ByteString
         getColumnNameBytes();
+
+    // optional string bit_vectors = 12;
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    boolean hasBitVectors();
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    java.lang.String getBitVectors();
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    com.google.protobuf.ByteString
+        getBitVectorsBytes();
   }
   /**
    * Protobuf type {@code org.apache.hadoop.hive.metastore.hbase.ColumnStats}
@@ -4073,6 +4088,11 @@ public final class HbaseMetastoreProto {
               columnName_ = input.readBytes();
               break;
             }
+            case 98: {
+              bitField0_ |= 0x00000800;
+              bitVectors_ = input.readBytes();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -7506,6 +7526,49 @@ public final class HbaseMetastoreProto {
       }
     }
 
+    // optional string bit_vectors = 12;
+    public static final int BIT_VECTORS_FIELD_NUMBER = 12;
+    private java.lang.Object bitVectors_;
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    public boolean hasBitVectors() {
+      return ((bitField0_ & 0x00000800) == 0x00000800);
+    }
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    public java.lang.String getBitVectors() {
+      java.lang.Object ref = bitVectors_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          bitVectors_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string bit_vectors = 12;</code>
+     */
+    public com.google.protobuf.ByteString
+        getBitVectorsBytes() {
+      java.lang.Object ref = bitVectors_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        bitVectors_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
     private void initFields() {
       lastAnalyzed_ = 0L;
       columnType_ = "";
@@ -7518,6 +7581,7 @@ public final class HbaseMetastoreProto {
       binaryStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.StringStats.getDefaultInstance();
       decimalStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.DecimalStats.getDefaultInstance();
       columnName_ = "";
+      bitVectors_ = "";
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -7574,6 +7638,9 @@ public final class HbaseMetastoreProto {
       if (((bitField0_ & 0x00000400) == 0x00000400)) {
         output.writeBytes(11, getColumnNameBytes());
       }
+      if (((bitField0_ & 0x00000800) == 0x00000800)) {
+        output.writeBytes(12, getBitVectorsBytes());
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -7627,6 +7694,10 @@ public final class HbaseMetastoreProto {
         size += com.google.protobuf.CodedOutputStream
           .computeBytesSize(11, getColumnNameBytes());
       }
+      if (((bitField0_ & 0x00000800) == 0x00000800)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(12, getBitVectorsBytes());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -7795,6 +7866,8 @@ public final class HbaseMetastoreProto {
         bitField0_ = (bitField0_ & ~0x00000200);
         columnName_ = "";
         bitField0_ = (bitField0_ & ~0x00000400);
+        bitVectors_ = "";
+        bitField0_ = (bitField0_ & ~0x00000800);
         return this;
       }
 
@@ -7891,6 +7964,10 @@ public final class HbaseMetastoreProto {
           to_bitField0_ |= 0x00000400;
         }
         result.columnName_ = columnName_;
+        if (((from_bitField0_ & 0x00000800) == 0x00000800)) {
+          to_bitField0_ |= 0x00000800;
+        }
+        result.bitVectors_ = bitVectors_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -7944,6 +8021,11 @@ public final class HbaseMetastoreProto {
           columnName_ = other.columnName_;
           onChanged();
         }
+        if (other.hasBitVectors()) {
+          bitField0_ |= 0x00000800;
+          bitVectors_ = other.bitVectors_;
+          onChanged();
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -8930,6 +9012,80 @@ public final class HbaseMetastoreProto {
         return this;
       }
 
+      // optional string bit_vectors = 12;
+      private java.lang.Object bitVectors_ = "";
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public boolean hasBitVectors() {
+        return ((bitField0_ & 0x00000800) == 0x00000800);
+      }
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public java.lang.String getBitVectors() {
+        java.lang.Object ref = bitVectors_;
+        if (!(ref instanceof java.lang.String)) {
+          java.lang.String s = ((com.google.protobuf.ByteString) ref)
+              .toStringUtf8();
+          bitVectors_ = s;
+          return s;
+        } else {
+          return (java.lang.String) ref;
+        }
+      }
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public com.google.protobuf.ByteString
+          getBitVectorsBytes() {
+        java.lang.Object ref = bitVectors_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (java.lang.String) ref);
+          bitVectors_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public Builder setBitVectors(
+          java.lang.String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000800;
+        bitVectors_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public Builder clearBitVectors() {
+        bitField0_ = (bitField0_ & ~0x00000800);
+        bitVectors_ = getDefaultInstance().getBitVectors();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string bit_vectors = 12;</code>
+       */
+      public Builder setBitVectorsBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000800;
+        bitVectors_ = value;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.metastore.hbase.ColumnStats)
     }
 
@@ -34506,7 +34662,7 @@ public final class HbaseMetastoreProto {
       "grStatsInvalidatorFilter.Entry\022\021\n\trun_ev" +
       "ery\030\002 \002(\003\022\034\n\024max_cache_entry_life\030\003 \002(\003\032" +
       "?\n\005Entry\022\017\n\007db_name\030\001 \002(\014\022\022\n\ntable_name\030" +
-      "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\335\010\n\013ColumnStats" +
+      "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\362\010\n\013ColumnStats" +
       "\022\025\n\rlast_analyzed\030\001 \001(\003\022\023\n\013column_type\030\002" +
       " \002(\t\022\021\n\tnum_nulls\030\003 \001(\003\022\033\n\023num_distinct_" +
       "values\030\004 \001(\003\022T\n\nbool_stats\030\005 \001(\0132@.org.a",
@@ -34522,132 +34678,133 @@ public final class HbaseMetastoreProto {
       "ve.metastore.hbase.ColumnStats.StringSta",
       "ts\022W\n\rdecimal_stats\030\n \001(\0132@.org.apache.h" +
       "adoop.hive.metastore.hbase.ColumnStats.D" +
-      "ecimalStats\022\023\n\013column_name\030\013 \001(\t\0325\n\014Bool" +
-      "eanStats\022\021\n\tnum_trues\030\001 \001(\003\022\022\n\nnum_false" +
-      "s\030\002 \001(\003\0322\n\tLongStats\022\021\n\tlow_value\030\001 \001(\022\022" +
-      "\022\n\nhigh_value\030\002 \001(\022\0324\n\013DoubleStats\022\021\n\tlo" +
-      "w_value\030\001 \001(\001\022\022\n\nhigh_value\030\002 \001(\001\032=\n\013Str" +
-      "ingStats\022\026\n\016max_col_length\030\001 \001(\003\022\026\n\016avg_" +
-      "col_length\030\002 \001(\001\032\365\001\n\014DecimalStats\022[\n\tlow" +
-      "_value\030\001 \001(\0132H.org.apache.hadoop.hive.me",
-      "tastore.hbase.ColumnStats.DecimalStats.D" +
-      "ecimal\022\\\n\nhigh_value\030\002 \001(\0132H.org.apache." +
-      "hadoop.hive.metastore.hbase.ColumnStats." +
-      "DecimalStats.Decimal\032*\n\007Decimal\022\020\n\010unsca" +
-      "led\030\001 \002(\014\022\r\n\005scale\030\002 \002(\005\"\246\002\n\010Database\022\023\n" +
-      "\013description\030\001 \001(\t\022\013\n\003uri\030\002 \001(\t\022F\n\nparam" +
-      "eters\030\003 \001(\01322.org.apache.hadoop.hive.met" +
-      "astore.hbase.Parameters\022Q\n\nprivileges\030\004 " +
-      "\001(\0132=.org.apache.hadoop.hive.metastore.h" +
-      "base.PrincipalPrivilegeSet\022\022\n\nowner_name",
-      "\030\005 \001(\t\022I\n\nowner_type\030\006 \001(\01625.org.apache." +
-      "hadoop.hive.metastore.hbase.PrincipalTyp" +
-      "e\"$\n\017DelegationToken\022\021\n\ttoken_str\030\001 \002(\t\"" +
-      ":\n\013FieldSchema\022\014\n\004name\030\001 \002(\t\022\014\n\004type\030\002 \002" +
-      "(\t\022\017\n\007comment\030\003 \001(\t\"\206\004\n\010Function\022\022\n\nclas" +
-      "s_name\030\001 \001(\t\022\022\n\nowner_name\030\002 \001(\t\022I\n\nowne" +
-      "r_type\030\003 \001(\01625.org.apache.hadoop.hive.me" +
-      "tastore.hbase.PrincipalType\022\023\n\013create_ti" +
-      "me\030\004 \001(\022\022T\n\rfunction_type\030\005 \001(\0162=.org.ap" +
-      "ache.hadoop.hive.metastore.hbase.Functio",
-      "n.FunctionType\022S\n\rresource_uris\030\006 \003(\0132<." +
-      "org.apache.hadoop.hive.metastore.hbase.F" +
-      "unction.ResourceUri\032\254\001\n\013ResourceUri\022`\n\rr" +
-      "esource_type\030\001 \002(\0162I.org.apache.hadoop.h" +
+      "ecimalStats\022\023\n\013column_name\030\013 \001(\t\022\023\n\013bit_" +
+      "vectors\030\014 \001(\t\0325\n\014BooleanStats\022\021\n\tnum_tru" +
+      "es\030\001 \001(\003\022\022\n\nnum_falses\030\002 \001(\003\0322\n\tLongStat" +
+      "s\022\021\n\tlow_value\030\001 \001(\022\022\022\n\nhigh_value\030\002 \001(\022" +
+      "\0324\n\013DoubleStats\022\021\n\tlow_value\030\001 \001(\001\022\022\n\nhi" +
+      "gh_value\030\002 \001(\001\032=\n\013StringStats\022\026\n\016max_col" +
+      "_length\030\001 \001(\003\022\026\n\016avg_col_length\030\002 \001(\001\032\365\001" +
+      "\n\014DecimalStats\022[\n\tlow_value\030\001 \001(\0132H.org.",
+      "apache.hadoop.hive.metastore.hbase.Colum" +
+      "nStats.DecimalStats.Decimal\022\\\n\nhigh_valu" +
+      "e\030\002 \001(\0132H.org.apache.hadoop.hive.metasto" +
+      "re.hbase.ColumnStats.DecimalStats.Decima" +
+      "l\032*\n\007Decimal\022\020\n\010unscaled\030\001 \002(\014\022\r\n\005scale\030" +
+      "\002 \002(\005\"\246\002\n\010Database\022\023\n\013description\030\001 \001(\t\022" +
+      "\013\n\003uri\030\002 \001(\t\022F\n\nparameters\030\003 \001(\01322.org.a" +
+      "pache.hadoop.hive.metastore.hbase.Parame" +
+      "ters\022Q\n\nprivileges\030\004 \001(\0132=.org.apache.ha" +
+      "doop.hive.metastore.hbase.PrincipalPrivi",
+      "legeSet\022\022\n\nowner_name\030\005 \001(\t\022I\n\nowner_typ" +
+      "e\030\006 \001(\01625.org.apache.hadoop.hive.metasto" +
+      "re.hbase.PrincipalType\"$\n\017DelegationToke" +
+      "n\022\021\n\ttoken_str\030\001 \002(\t\":\n\013FieldSchema\022\014\n\004n" +
+      "ame\030\001 \002(\t\022\014\n\004type\030\002 \002(\t\022\017\n\007comment\030\003 \001(\t" +
+      "\"\206\004\n\010Function\022\022\n\nclass_name\030\001 \001(\t\022\022\n\nown" +
+      "er_name\030\002 \001(\t\022I\n\nowner_type\030\003 \001(\01625.org." +
+      "apache.hadoop.hive.metastore.hbase.Princ" +
+      "ipalType\022\023\n\013create_time\030\004 \001(\022\022T\n\rfunctio" +
+      "n_type\030\005 \001(\0162=.org.apache.hadoop.hive.me",
+      "tastore.hbase.Function.FunctionType\022S\n\rr" +
+      "esource_uris\030\006 \003(\0132<.org.apache.hadoop.h" +
       "ive.metastore.hbase.Function.ResourceUri" +
-      ".ResourceType\022\013\n\003uri\030\002 \002(\t\".\n\014ResourceTy" +
-      "pe\022\007\n\003JAR\020\001\022\010\n\004FILE\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014Fu" +
-      "nctionType\022\010\n\004JAVA\020\001\"\037\n\tMasterKey\022\022\n\nmas" +
-      "ter_key\030\001 \002(\t\",\n\016ParameterEntry\022\013\n\003key\030\001" +
-      " \002(\t\022\r\n\005value\030\002 \002(\t\"W\n\nParameters\022I\n\tpar",
-      "ameter\030\001 \003(\01326.org.apache.hadoop.hive.me" +
-      "tastore.hbase.ParameterEntry\"\360\001\n\tPartiti" +
-      "on\022\023\n\013create_time\030\001 \001(\003\022\030\n\020last_access_t" +
-      "ime\030\002 \001(\003\022\020\n\010location\030\003 \001(\t\022I\n\rsd_parame" +
-      "ters\030\004 \001(\01322.org.apache.hadoop.hive.meta" +
-      "store.hbase.Parameters\022\017\n\007sd_hash\030\005 \002(\014\022" +
-      "F\n\nparameters\030\006 \001(\01322.org.apache.hadoop." +
-      "hive.metastore.hbase.Parameters\"\204\001\n\032Prin" +
-      "cipalPrivilegeSetEntry\022\026\n\016principal_name" +
-      "\030\001 \002(\t\022N\n\nprivileges\030\002 \003(\0132:.org.apache.",
-      "hadoop.hive.metastore.hbase.PrivilegeGra" +
-      "ntInfo\"\275\001\n\025PrincipalPrivilegeSet\022Q\n\005user" +
-      "s\030\001 \003(\0132B.org.apache.hadoop.hive.metasto" +
-      "re.hbase.PrincipalPrivilegeSetEntry\022Q\n\005r" +
-      "oles\030\002 \003(\0132B.org.apache.hadoop.hive.meta" +
-      "store.hbase.PrincipalPrivilegeSetEntry\"\260" +
-      "\001\n\022PrivilegeGrantInfo\022\021\n\tprivilege\030\001 \001(\t" +
-      "\022\023\n\013create_time\030\002 \001(\003\022\017\n\007grantor\030\003 \001(\t\022K" +
-      "\n\014grantor_type\030\004 \001(\01625.org.apache.hadoop" +
-      ".hive.metastore.hbase.PrincipalType\022\024\n\014g",
-      "rant_option\030\005 \001(\010\"\374\001\n\rRoleGrantInfo\022\026\n\016p" +
-      "rincipal_name\030\001 \002(\t\022M\n\016principal_type\030\002 " +
-      "\002(\01625.org.apache.hadoop.hive.metastore.h" +
-      "base.PrincipalType\022\020\n\010add_time\030\003 \001(\003\022\017\n\007" +
-      "grantor\030\004 \001(\t\022K\n\014grantor_type\030\005 \001(\01625.or" +
-      "g.apache.hadoop.hive.metastore.hbase.Pri" +
-      "ncipalType\022\024\n\014grant_option\030\006 \001(\010\"^\n\021Role" +
-      "GrantInfoList\022I\n\ngrant_info\030\001 \003(\01325.org." +
-      "apache.hadoop.hive.metastore.hbase.RoleG" +
-      "rantInfo\"\030\n\010RoleList\022\014\n\004role\030\001 \003(\t\"/\n\004Ro",
-      "le\022\023\n\013create_time\030\001 \001(\003\022\022\n\nowner_name\030\002 " +
-      "\001(\t\"\254\010\n\021StorageDescriptor\022A\n\004cols\030\001 \003(\0132" +
-      "3.org.apache.hadoop.hive.metastore.hbase" +
-      ".FieldSchema\022\024\n\014input_format\030\002 \001(\t\022\025\n\rou" +
-      "tput_format\030\003 \001(\t\022\025\n\ris_compressed\030\004 \001(\010" +
-      "\022\023\n\013num_buckets\030\005 \001(\021\022W\n\nserde_info\030\006 \001(" +
-      "\0132C.org.apache.hadoop.hive.metastore.hba" +
-      "se.StorageDescriptor.SerDeInfo\022\023\n\013bucket" +
-      "_cols\030\007 \003(\t\022R\n\tsort_cols\030\010 \003(\0132?.org.apa" +
-      "che.hadoop.hive.metastore.hbase.StorageD",
-      "escriptor.Order\022Y\n\013skewed_info\030\t \001(\0132D.o" +
-      "rg.apache.hadoop.hive.metastore.hbase.St" +
-      "orageDescriptor.SkewedInfo\022!\n\031stored_as_" +
-      "sub_directories\030\n \001(\010\032.\n\005Order\022\023\n\013column" +
-      "_name\030\001 \002(\t\022\020\n\005order\030\002 \001(\021:\0011\032|\n\tSerDeIn" +
-      "fo\022\014\n\004name\030\001 \001(\t\022\031\n\021serialization_lib\030\002 " +
-      "\001(\t\022F\n\nparameters\030\003 \001(\01322.org.apache.had" +
-      "oop.hive.metastore.hbase.Parameters\032\214\003\n\n" +
-      "SkewedInfo\022\030\n\020skewed_col_names\030\001 \003(\t\022r\n\021" +
-      "skewed_col_values\030\002 \003(\0132W.org.apache.had",
-      "oop.hive.metastore.hbase.StorageDescript" +
-      "or.SkewedInfo.SkewedColValueList\022\206\001\n\036ske" +
-      "wed_col_value_location_maps\030\003 \003(\0132^.org." +
-      "apache.hadoop.hive.metastore.hbase.Stora" +
-      "geDescriptor.SkewedInfo.SkewedColValueLo" +
-      "cationMap\032.\n\022SkewedColValueList\022\030\n\020skewe" +
-      "d_col_value\030\001 \003(\t\0327\n\031SkewedColValueLocat" +
-      "ionMap\022\013\n\003key\030\001 \003(\t\022\r\n\005value\030\002 \002(\t\"\220\004\n\005T" +
-      "able\022\r\n\005owner\030\001 \001(\t\022\023\n\013create_time\030\002 \001(\003" +
-      "\022\030\n\020last_access_time\030\003 \001(\003\022\021\n\tretention\030",
-      "\004 \001(\003\022\020\n\010location\030\005 \001(\t\022I\n\rsd_parameters" +
-      "\030\006 \001(\01322.org.apache.hadoop.hive.metastor" +
-      "e.hbase.Parameters\022\017\n\007sd_hash\030\007 \002(\014\022K\n\016p" +
-      "artition_keys\030\010 \003(\01323.org.apache.hadoop." +
-      "hive.metastore.hbase.FieldSchema\022F\n\npara" +
-      "meters\030\t \001(\01322.org.apache.hadoop.hive.me" +
-      "tastore.hbase.Parameters\022\032\n\022view_origina" +
-      "l_text\030\n \001(\t\022\032\n\022view_expanded_text\030\013 \001(\t" +
-      "\022\022\n\ntable_type\030\014 \001(\t\022Q\n\nprivileges\030\r \001(\013" +
-      "2=.org.apache.hadoop.hive.metastore.hbas",
-      "e.PrincipalPrivilegeSet\022\024\n\014is_temporary\030" +
-      "\016 \001(\010\"\353\004\n\026PartitionKeyComparator\022\r\n\005name" +
-      "s\030\001 \002(\t\022\r\n\005types\030\002 \002(\t\022S\n\002op\030\003 \003(\0132G.org" +
-      ".apache.hadoop.hive.metastore.hbase.Part" +
-      "itionKeyComparator.Operator\022S\n\005range\030\004 \003" +
-      "(\0132D.org.apache.hadoop.hive.metastore.hb" +
-      "ase.PartitionKeyComparator.Range\032(\n\004Mark" +
-      "\022\r\n\005value\030\001 \002(\t\022\021\n\tinclusive\030\002 \002(\010\032\272\001\n\005R" +
-      "ange\022\013\n\003key\030\001 \002(\t\022R\n\005start\030\002 \001(\0132C.org.a" +
-      "pache.hadoop.hive.metastore.hbase.Partit",
-      "ionKeyComparator.Mark\022P\n\003end\030\003 \001(\0132C.org" +
-      ".apache.hadoop.hive.metastore.hbase.Part" +
-      "itionKeyComparator.Mark\032\241\001\n\010Operator\022Z\n\004" +
-      "type\030\001 \002(\0162L.org.apache.hadoop.hive.meta" +
-      "store.hbase.PartitionKeyComparator.Opera" +
-      "tor.Type\022\013\n\003key\030\002 \002(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Ty" +
-      "pe\022\010\n\004LIKE\020\000\022\r\n\tNOTEQUALS\020\001*#\n\rPrincipal" +
-      "Type\022\010\n\004USER\020\000\022\010\n\004ROLE\020\001"
+      "\032\254\001\n\013ResourceUri\022`\n\rresource_type\030\001 \002(\0162" +
+      "I.org.apache.hadoop.hive.metastore.hbase" +
+      ".Function.ResourceUri.ResourceType\022\013\n\003ur" +
+      "i\030\002 \002(\t\".\n\014ResourceType\022\007\n\003JAR\020\001\022\010\n\004FILE" +
+      "\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014FunctionType\022\010\n\004JAVA\020" +
+      "\001\"\037\n\tMasterKey\022\022\n\nmaster_key\030\001 \002(\t\",\n\016Pa" +
+      "rameterEntry\022\013\n\003key\030\001 \002(\t\022\r\n\005value\030\002 \002(\t",
+      "\"W\n\nParameters\022I\n\tparameter\030\001 \003(\01326.org." +
+      "apache.hadoop.hive.metastore.hbase.Param" +
+      "eterEntry\"\360\001\n\tPartition\022\023\n\013create_time\030\001" +
+      " \001(\003\022\030\n\020last_access_time\030\002 \001(\003\022\020\n\010locati" +
+      "on\030\003 \001(\t\022I\n\rsd_parameters\030\004 \001(\01322.org.ap" +
+      "ache.hadoop.hive.metastore.hbase.Paramet" +
+      "ers\022\017\n\007sd_hash\030\005 \002(\014\022F\n\nparameters\030\006 \001(\013" +
+      "22.org.apache.hadoop.hive.metastore.hbas" +
+      "e.Parameters\"\204\001\n\032PrincipalPrivilegeSetEn" +
+      "try\022\026\n\016principal_name\030\001 \002(\t\022N\n\nprivilege",
+      "s\030\002 \003(\0132:.org.apache.hadoop.hive.metasto" +
+      "re.hbase.PrivilegeGrantInfo\"\275\001\n\025Principa" +
+      "lPrivilegeSet\022Q\n\005users\030\001 \003(\0132B.org.apach" +
+      "e.hadoop.hive.metastore.hbase.PrincipalP" +
+      "rivilegeSetEntry\022Q\n\005roles\030\002 \003(\0132B.org.ap" +
+      "ache.hadoop.hive.metastore.hbase.Princip" +
+      "alPrivilegeSetEntry\"\260\001\n\022PrivilegeGrantIn" +
+      "fo\022\021\n\tprivilege\030\001 \001(\t\022\023\n\013create_time\030\002 \001" +
+      "(\003\022\017\n\007grantor\030\003 \001(\t\022K\n\014grantor_type\030\004 \001(" +
+      "\01625.org.apache.hadoop.hive.metastore.hba",
+      "se.PrincipalType\022\024\n\014grant_option\030\005 \001(\010\"\374" +
+      "\001\n\rRoleGrantInfo\022\026\n\016principal_name\030\001 \002(\t" +
+      "\022M\n\016principal_type\030\002 \002(\01625.org.apache.ha" +
+      "doop.hive.metastore.hbase.PrincipalType\022" +
+      "\020\n\010add_time\030\003 \001(\003\022\017\n\007grantor\030\004 \001(\t\022K\n\014gr" +
+      "antor_type\030\005 \001(\01625.org.apache.hadoop.hiv" +
+      "e.metastore.hbase.PrincipalType\022\024\n\014grant" +
+      "_option\030\006 \001(\010\"^\n\021RoleGrantInfoList\022I\n\ngr" +
+      "ant_info\030\001 \003(\01325.org.apache.hadoop.hive." +
+      "metastore.hbase.RoleGrantInfo\"\030\n\010RoleLis",
+      "t\022\014\n\004role\030\001 \003(\t\"/\n\004Role\022\023\n\013create_time\030\001" +
+      " \001(\003\022\022\n\nowner_name\030\002 \001(\t\"\254\010\n\021StorageDesc" +
+      "riptor\022A\n\004cols\030\001 \003(\01323.org.apache.hadoop" +
+      ".hive.metastore.hbase.FieldSchema\022\024\n\014inp" +
+      "ut_format\030\002 \001(\t\022\025\n\routput_format\030\003 \001(\t\022\025" +
+      "\n\ris_compressed\030\004 \001(\010\022\023\n\013num_buckets\030\005 \001" +
+      "(\021\022W\n\nserde_info\030\006 \001(\0132C.org.apache.hado" +
+      "op.hive.metastore.hbase.StorageDescripto" +
+      "r.SerDeInfo\022\023\n\013bucket_cols\030\007 \003(\t\022R\n\tsort" +
+      "_cols\030\010 \003(\0132?.org.apache.hadoop.hive.met",
+      "astore.hbase.StorageDescriptor.Order\022Y\n\013" +
+      "skewed_info\030\t \001(\0132D.org.apache.hadoop.hi" +
+      "ve.metastore.hbase.StorageDescriptor.Ske" +
+      "wedInfo\022!\n\031stored_as_sub_directories\030\n \001" +
+      "(\010\032.\n\005Order\022\023\n\013column_name\030\001 \002(\t\022\020\n\005orde" +
+      "r\030\002 \001(\021:\0011\032|\n\tSerDeInfo\022\014\n\004name\030\001 \001(\t\022\031\n" +
+      "\021serialization_lib\030\002 \001(\t\022F\n\nparameters\030\003" +
+      " \001(\01322.org.apache.hadoop.hive.metastore." +
+      "hbase.Parameters\032\214\003\n\nSkewedInfo\022\030\n\020skewe" +
+      "d_col_names\030\001 \003(\t\022r\n\021skewed_col_values\030\002",
+      " \003(\0132W.org.apache.hadoop.hive.metastore." +
+      "hbase.StorageDescriptor.SkewedInfo.Skewe" +
+      "dColValueList\022\206\001\n\036skewed_col_value_locat" +
+      "ion_maps\030\003 \003(\0132^.org.apache.hadoop.hive." +
+      "metastore.hbase.StorageDescriptor.Skewed" +
+      "Info.SkewedColValueLocationMap\032.\n\022Skewed" +
+      "ColValueList\022\030\n\020skewed_col_value\030\001 \003(\t\0327" +
+      "\n\031SkewedColValueLocationMap\022\013\n\003key\030\001 \003(\t" +
+      "\022\r\n\005value\030\002 \002(\t\"\220\004\n\005Table\022\r\n\005owner\030\001 \001(\t" +
+      "\022\023\n\013create_time\030\002 \001(\003\022\030\n\020last_access_tim",
+      "e\030\003 \001(\003\022\021\n\tretention\030\004 \001(\003\022\020\n\010location\030\005" +
+      " \001(\t\022I\n\rsd_parameters\030\006 \001(\01322.org.apache" +
+      ".hadoop.hive.metastore.hbase.Parameters\022" +
+      "\017\n\007sd_hash\030\007 \002(\014\022K\n\016partition_keys\030\010 \003(\013" +
+      "23.org.apache.hadoop.hive.metastore.hbas" +
+      "e.FieldSchema\022F\n\nparameters\030\t \001(\01322.org." +
+      "apache.hadoop.hive.metastore.hbase.Param" +
+      "eters\022\032\n\022view_original_text\030\n \001(\t\022\032\n\022vie" +
+      "w_expanded_text\030\013 \001(\t\022\022\n\ntable_type\030\014 \001(" +
+      "\t\022Q\n\nprivileges\030\r \001(\0132=.org.apache.hadoo",
+      "p.hive.metastore.hbase.PrincipalPrivileg" +
+      "eSet\022\024\n\014is_temporary\030\016 \001(\010\"\353\004\n\026Partition" +
+      "KeyComparator\022\r\n\005names\030\001 \002(\t\022\r\n\005types\030\002 " +
+      "\002(\t\022S\n\002op\030\003 \003(\0132G.org.apache.hadoop.hive" +
+      ".metastore.hbase.PartitionKeyComparator." +
+      "Operator\022S\n\005range\030\004 \003(\0132D.org.apache.had" +
+      "oop.hive.metastore.hbase.PartitionKeyCom" +
+      "parator.Range\032(\n\004Mark\022\r\n\005value\030\001 \002(\t\022\021\n\t" +
+      "inclusive\030\002 \002(\010\032\272\001\n\005Range\022\013\n\003key\030\001 \002(\t\022R" +
+      "\n\005start\030\002 \001(\0132C.org.apache.hadoop.hive.m",
+      "etastore.hbase.PartitionKeyComparator.Ma" +
+      "rk\022P\n\003end\030\003 \001(\0132C.org.apache.hadoop.hive" +
+      ".metastore.hbase.PartitionKeyComparator." +
+      "Mark\032\241\001\n\010Operator\022Z\n\004type\030\001 \002(\0162L.org.ap" +
+      "ache.hadoop.hive.metastore.hbase.Partiti" +
+      "onKeyComparator.Operator.Type\022\013\n\003key\030\002 \002" +
+      "(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Type\022\010\n\004LIKE\020\000\022\r\n\tNOT" +
+      "EQUALS\020\001*#\n\rPrincipalType\022\010\n\004USER\020\000\022\010\n\004R" +
+      "OLE\020\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -34689,7 +34846,7 @@ public final class HbaseMetastoreProto {
           internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor,
-              new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", });
+              new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", "BitVectors", });
           internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_descriptor =
             internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor.getNestedTypes().get(0);
           internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_fieldAccessorTable = new

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
index 0203b06..81577b6 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
@@ -5425,6 +5425,11 @@ void BooleanColumnStatsData::__set_numNulls(const int64_t val) {
   this->numNulls = val;
 }
 
+void BooleanColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5473,6 +5478,14 @@ uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr
           xfer += iprot->skip(ftype);
         }
         break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -5508,6 +5521,11 @@ uint32_t BooleanColumnStatsData::write(::apache::thrift::protocol::TProtocol* op
   xfer += oprot->writeI64(this->numNulls);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -5518,17 +5536,23 @@ void swap(BooleanColumnStatsData &a, BooleanColumnStatsData &b) {
   swap(a.numTrues, b.numTrues);
   swap(a.numFalses, b.numFalses);
   swap(a.numNulls, b.numNulls);
+  swap(a.bitVectors, b.bitVectors);
+  swap(a.__isset, b.__isset);
 }
 
 BooleanColumnStatsData::BooleanColumnStatsData(const BooleanColumnStatsData& other279) {
   numTrues = other279.numTrues;
   numFalses = other279.numFalses;
   numNulls = other279.numNulls;
+  bitVectors = other279.bitVectors;
+  __isset = other279.__isset;
 }
 BooleanColumnStatsData& BooleanColumnStatsData::operator=(const BooleanColumnStatsData& other280) {
   numTrues = other280.numTrues;
   numFalses = other280.numFalses;
   numNulls = other280.numNulls;
+  bitVectors = other280.bitVectors;
+  __isset = other280.__isset;
   return *this;
 }
 void BooleanColumnStatsData::printTo(std::ostream& out) const {
@@ -5537,6 +5561,7 @@ void BooleanColumnStatsData::printTo(std::ostream& out) const {
   out << "numTrues=" << to_string(numTrues);
   out << ", " << "numFalses=" << to_string(numFalses);
   out << ", " << "numNulls=" << to_string(numNulls);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -5563,6 +5588,11 @@ void DoubleColumnStatsData::__set_numDVs(const int64_t val) {
   this->numDVs = val;
 }
 
+void DoubleColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5618,6 +5648,14 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
           xfer += iprot->skip(ftype);
         }
         break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -5657,6 +5695,11 @@ uint32_t DoubleColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
   xfer += oprot->writeI64(this->numDVs);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -5668,6 +5711,7 @@ void swap(DoubleColumnStatsData &a, DoubleColumnStatsData &b) {
   swap(a.highValue, b.highValue);
   swap(a.numNulls, b.numNulls);
   swap(a.numDVs, b.numDVs);
+  swap(a.bitVectors, b.bitVectors);
   swap(a.__isset, b.__isset);
 }
 
@@ -5676,6 +5720,7 @@ DoubleColumnStatsData::DoubleColumnStatsData(const DoubleColumnStatsData& other2
   highValue = other281.highValue;
   numNulls = other281.numNulls;
   numDVs = other281.numDVs;
+  bitVectors = other281.bitVectors;
   __isset = other281.__isset;
 }
 DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsData& other282) {
@@ -5683,6 +5728,7 @@ DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsD
   highValue = other282.highValue;
   numNulls = other282.numNulls;
   numDVs = other282.numDVs;
+  bitVectors = other282.bitVectors;
   __isset = other282.__isset;
   return *this;
 }
@@ -5693,6 +5739,7 @@ void DoubleColumnStatsData::printTo(std::ostream& out) const {
   out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
   out << ", " << "numNulls=" << to_string(numNulls);
   out << ", " << "numDVs=" << to_string(numDVs);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -5719,6 +5766,11 @@ void LongColumnStatsData::__set_numDVs(const int64_t val) {
   this->numDVs = val;
 }
 
+void LongColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5774,6 +5826,14 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot)
           xfer += iprot->skip(ftype);
         }
         break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -5813,6 +5873,11 @@ uint32_t LongColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot
   xfer += oprot->writeI64(this->numDVs);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -5824,6 +5889,7 @@ void swap(LongColumnStatsData &a, LongColumnStatsData &b) {
   swap(a.highValue, b.highValue);
   swap(a.numNulls, b.numNulls);
   swap(a.numDVs, b.numDVs);
+  swap(a.bitVectors, b.bitVectors);
   swap(a.__isset, b.__isset);
 }
 
@@ -5832,6 +5898,7 @@ LongColumnStatsData::LongColumnStatsData(const LongColumnStatsData& other283) {
   highValue = other283.highValue;
   numNulls = other283.numNulls;
   numDVs = other283.numDVs;
+  bitVectors = other283.bitVectors;
   __isset = other283.__isset;
 }
 LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& other284) {
@@ -5839,6 +5906,7 @@ LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& o
   highValue = other284.highValue;
   numNulls = other284.numNulls;
   numDVs = other284.numDVs;
+  bitVectors = other284.bitVectors;
   __isset = other284.__isset;
   return *this;
 }
@@ -5849,6 +5917,7 @@ void LongColumnStatsData::printTo(std::ostream& out) const {
   out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
   out << ", " << "numNulls=" << to_string(numNulls);
   out << ", " << "numDVs=" << to_string(numDVs);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -5873,6 +5942,11 @@ void StringColumnStatsData::__set_numDVs(const int64_t val) {
   this->numDVs = val;
 }
 
+void StringColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5930,6 +6004,14 @@ uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
           xfer += iprot->skip(ftype);
         }
         break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -5971,6 +6053,11 @@ uint32_t StringColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
   xfer += oprot->writeI64(this->numDVs);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -5982,6 +6069,8 @@ void swap(StringColumnStatsData &a, StringColumnStatsData &b) {
   swap(a.avgColLen, b.avgColLen);
   swap(a.numNulls, b.numNulls);
   swap(a.numDVs, b.numDVs);
+  swap(a.bitVectors, b.bitVectors);
+  swap(a.__isset, b.__isset);
 }
 
 StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other285) {
@@ -5989,12 +6078,16 @@ StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other2
   avgColLen = other285.avgColLen;
   numNulls = other285.numNulls;
   numDVs = other285.numDVs;
+  bitVectors = other285.bitVectors;
+  __isset = other285.__isset;
 }
 StringColumnStatsData& StringColumnStatsData::operator=(const StringColumnStatsData& other286) {
   maxColLen = other286.maxColLen;
   avgColLen = other286.avgColLen;
   numNulls = other286.numNulls;
   numDVs = other286.numDVs;
+  bitVectors = other286.bitVectors;
+  __isset = other286.__isset;
   return *this;
 }
 void StringColumnStatsData::printTo(std::ostream& out) const {
@@ -6004,6 +6097,7 @@ void StringColumnStatsData::printTo(std::ostream& out) const {
   out << ", " << "avgColLen=" << to_string(avgColLen);
   out << ", " << "numNulls=" << to_string(numNulls);
   out << ", " << "numDVs=" << to_string(numDVs);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -6024,6 +6118,11 @@ void BinaryColumnStatsData::__set_numNulls(const int64_t val) {
   this->numNulls = val;
 }
 
+void BinaryColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6072,6 +6171,14 @@ uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
           xfer += iprot->skip(ftype);
         }
         break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -6107,6 +6214,11 @@ uint32_t BinaryColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
   xfer += oprot->writeI64(this->numNulls);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -6117,17 +6229,23 @@ void swap(BinaryColumnStatsData &a, BinaryColumnStatsData &b) {
   swap(a.maxColLen, b.maxColLen);
   swap(a.avgColLen, b.avgColLen);
   swap(a.numNulls, b.numNulls);
+  swap(a.bitVectors, b.bitVectors);
+  swap(a.__isset, b.__isset);
 }
 
 BinaryColumnStatsData::BinaryColumnStatsData(const BinaryColumnStatsData& other287) {
   maxColLen = other287.maxColLen;
   avgColLen = other287.avgColLen;
   numNulls = other287.numNulls;
+  bitVectors = other287.bitVectors;
+  __isset = other287.__isset;
 }
 BinaryColumnStatsData& BinaryColumnStatsData::operator=(const BinaryColumnStatsData& other288) {
   maxColLen = other288.maxColLen;
   avgColLen = other288.avgColLen;
   numNulls = other288.numNulls;
+  bitVectors = other288.bitVectors;
+  __isset = other288.__isset;
   return *this;
 }
 void BinaryColumnStatsData::printTo(std::ostream& out) const {
@@ -6136,6 +6254,7 @@ void BinaryColumnStatsData::printTo(std::ostream& out) const {
   out << "maxColLen=" << to_string(maxColLen);
   out << ", " << "avgColLen=" << to_string(avgColLen);
   out << ", " << "numNulls=" << to_string(numNulls);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -6271,6 +6390,11 @@ void DecimalColumnStatsData::__set_numDVs(const int64_t val) {
   this->numDVs = val;
 }
 
+void DecimalColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6326,6 +6450,14 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr
           xfer += iprot->skip(ftype);
         }
         break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -6365,6 +6497,11 @@ uint32_t DecimalColumnStatsData::write(::apache::thrift::protocol::TProtocol* op
   xfer += oprot->writeI64(this->numDVs);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -6376,6 +6513,7 @@ void swap(DecimalColumnStatsData &a, DecimalColumnStatsData &b) {
   swap(a.highValue, b.highValue);
   swap(a.numNulls, b.numNulls);
   swap(a.numDVs, b.numDVs);
+  swap(a.bitVectors, b.bitVectors);
   swap(a.__isset, b.__isset);
 }
 
@@ -6384,6 +6522,7 @@ DecimalColumnStatsData::DecimalColumnStatsData(const DecimalColumnStatsData& oth
   highValue = other291.highValue;
   numNulls = other291.numNulls;
   numDVs = other291.numDVs;
+  bitVectors = other291.bitVectors;
   __isset = other291.__isset;
 }
 DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnStatsData& other292) {
@@ -6391,6 +6530,7 @@ DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnSta
   highValue = other292.highValue;
   numNulls = other292.numNulls;
   numDVs = other292.numDVs;
+  bitVectors = other292.bitVectors;
   __isset = other292.__isset;
   return *this;
 }
@@ -6401,6 +6541,7 @@ void DecimalColumnStatsData::printTo(std::ostream& out) const {
   out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
   out << ", " << "numNulls=" << to_string(numNulls);
   out << ", " << "numDVs=" << to_string(numDVs);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 
@@ -6513,6 +6654,11 @@ void DateColumnStatsData::__set_numDVs(const int64_t val) {
   this->numDVs = val;
 }
 
+void DateColumnStatsData::__set_bitVectors(const std::string& val) {
+  this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
 uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6568,6 +6714,14 @@ uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot)
           xfer += iprot->skip(ftype);
         }
         break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->bitVectors);
+          this->__isset.bitVectors = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -6607,6 +6761,11 @@ uint32_t DateColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot
   xfer += oprot->writeI64(this->numDVs);
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.bitVectors) {
+    xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeString(this->bitVectors);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -6618,6 +6777,7 @@ void swap(DateColumnStatsData &a, DateColumnStatsData &b) {
   swap(a.highValue, b.highValue);
   swap(a.numNulls, b.numNulls);
   swap(a.numDVs, b.numDVs);
+  swap(a.bitVectors, b.bitVectors);
   swap(a.__isset, b.__isset);
 }
 
@@ -6626,6 +6786,7 @@ DateColumnStatsData::DateColumnStatsData(const DateColumnStatsData& other295) {
   highValue = other295.highValue;
   numNulls = other295.numNulls;
   numDVs = other295.numDVs;
+  bitVectors = other295.bitVectors;
   __isset = other295.__isset;
 }
 DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& other296) {
@@ -6633,6 +6794,7 @@ DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& o
   highValue = other296.highValue;
   numNulls = other296.numNulls;
   numDVs = other296.numDVs;
+  bitVectors = other296.bitVectors;
   __isset = other296.__isset;
   return *this;
 }
@@ -6643,6 +6805,7 @@ void DateColumnStatsData::printTo(std::ostream& out) const {
   out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
   out << ", " << "numNulls=" << to_string(numNulls);
   out << ", " << "numDVs=" << to_string(numDVs);
+  out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
   out << ")";
 }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
index ce1d7da..c501ac0 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
@@ -2364,19 +2364,26 @@ inline std::ostream& operator<<(std::ostream& out, const Index& obj)
   return out;
 }
 
+typedef struct _BooleanColumnStatsData__isset {
+  _BooleanColumnStatsData__isset() : bitVectors(false) {}
+  bool bitVectors :1;
+} _BooleanColumnStatsData__isset;
 
 class BooleanColumnStatsData {
  public:
 
   BooleanColumnStatsData(const BooleanColumnStatsData&);
   BooleanColumnStatsData& operator=(const BooleanColumnStatsData&);
-  BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0) {
+  BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0), bitVectors() {
   }
 
   virtual ~BooleanColumnStatsData() throw();
   int64_t numTrues;
   int64_t numFalses;
   int64_t numNulls;
+  std::string bitVectors;
+
+  _BooleanColumnStatsData__isset __isset;
 
   void __set_numTrues(const int64_t val);
 
@@ -2384,6 +2391,8 @@ class BooleanColumnStatsData {
 
   void __set_numNulls(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const BooleanColumnStatsData & rhs) const
   {
     if (!(numTrues == rhs.numTrues))
@@ -2392,6 +2401,10 @@ class BooleanColumnStatsData {
       return false;
     if (!(numNulls == rhs.numNulls))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const BooleanColumnStatsData &rhs) const {
@@ -2415,9 +2428,10 @@ inline std::ostream& operator<<(std::ostream& out, const BooleanColumnStatsData&
 }
 
 typedef struct _DoubleColumnStatsData__isset {
-  _DoubleColumnStatsData__isset() : lowValue(false), highValue(false) {}
+  _DoubleColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
   bool lowValue :1;
   bool highValue :1;
+  bool bitVectors :1;
 } _DoubleColumnStatsData__isset;
 
 class DoubleColumnStatsData {
@@ -2425,7 +2439,7 @@ class DoubleColumnStatsData {
 
   DoubleColumnStatsData(const DoubleColumnStatsData&);
   DoubleColumnStatsData& operator=(const DoubleColumnStatsData&);
-  DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) {
+  DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() {
   }
 
   virtual ~DoubleColumnStatsData() throw();
@@ -2433,6 +2447,7 @@ class DoubleColumnStatsData {
   double highValue;
   int64_t numNulls;
   int64_t numDVs;
+  std::string bitVectors;
 
   _DoubleColumnStatsData__isset __isset;
 
@@ -2444,6 +2459,8 @@ class DoubleColumnStatsData {
 
   void __set_numDVs(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const DoubleColumnStatsData & rhs) const
   {
     if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2458,6 +2475,10 @@ class DoubleColumnStatsData {
       return false;
     if (!(numDVs == rhs.numDVs))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const DoubleColumnStatsData &rhs) const {
@@ -2481,9 +2502,10 @@ inline std::ostream& operator<<(std::ostream& out, const DoubleColumnStatsData&
 }
 
 typedef struct _LongColumnStatsData__isset {
-  _LongColumnStatsData__isset() : lowValue(false), highValue(false) {}
+  _LongColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
   bool lowValue :1;
   bool highValue :1;
+  bool bitVectors :1;
 } _LongColumnStatsData__isset;
 
 class LongColumnStatsData {
@@ -2491,7 +2513,7 @@ class LongColumnStatsData {
 
   LongColumnStatsData(const LongColumnStatsData&);
   LongColumnStatsData& operator=(const LongColumnStatsData&);
-  LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) {
+  LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() {
   }
 
   virtual ~LongColumnStatsData() throw();
@@ -2499,6 +2521,7 @@ class LongColumnStatsData {
   int64_t highValue;
   int64_t numNulls;
   int64_t numDVs;
+  std::string bitVectors;
 
   _LongColumnStatsData__isset __isset;
 
@@ -2510,6 +2533,8 @@ class LongColumnStatsData {
 
   void __set_numDVs(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const LongColumnStatsData & rhs) const
   {
     if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2524,6 +2549,10 @@ class LongColumnStatsData {
       return false;
     if (!(numDVs == rhs.numDVs))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const LongColumnStatsData &rhs) const {
@@ -2546,13 +2575,17 @@ inline std::ostream& operator<<(std::ostream& out, const LongColumnStatsData& ob
   return out;
 }
 
+typedef struct _StringColumnStatsData__isset {
+  _StringColumnStatsData__isset() : bitVectors(false) {}
+  bool bitVectors :1;
+} _StringColumnStatsData__isset;
 
 class StringColumnStatsData {
  public:
 
   StringColumnStatsData(const StringColumnStatsData&);
   StringColumnStatsData& operator=(const StringColumnStatsData&);
-  StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0) {
+  StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0), bitVectors() {
   }
 
   virtual ~StringColumnStatsData() throw();
@@ -2560,6 +2593,9 @@ class StringColumnStatsData {
   double avgColLen;
   int64_t numNulls;
   int64_t numDVs;
+  std::string bitVectors;
+
+  _StringColumnStatsData__isset __isset;
 
   void __set_maxColLen(const int64_t val);
 
@@ -2569,6 +2605,8 @@ class StringColumnStatsData {
 
   void __set_numDVs(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const StringColumnStatsData & rhs) const
   {
     if (!(maxColLen == rhs.maxColLen))
@@ -2579,6 +2617,10 @@ class StringColumnStatsData {
       return false;
     if (!(numDVs == rhs.numDVs))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const StringColumnStatsData &rhs) const {
@@ -2601,19 +2643,26 @@ inline std::ostream& operator<<(std::ostream& out, const StringColumnStatsData&
   return out;
 }
 
+typedef struct _BinaryColumnStatsData__isset {
+  _BinaryColumnStatsData__isset() : bitVectors(false) {}
+  bool bitVectors :1;
+} _BinaryColumnStatsData__isset;
 
 class BinaryColumnStatsData {
  public:
 
   BinaryColumnStatsData(const BinaryColumnStatsData&);
   BinaryColumnStatsData& operator=(const BinaryColumnStatsData&);
-  BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0) {
+  BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), bitVectors() {
   }
 
   virtual ~BinaryColumnStatsData() throw();
   int64_t maxColLen;
   double avgColLen;
   int64_t numNulls;
+  std::string bitVectors;
+
+  _BinaryColumnStatsData__isset __isset;
 
   void __set_maxColLen(const int64_t val);
 
@@ -2621,6 +2670,8 @@ class BinaryColumnStatsData {
 
   void __set_numNulls(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const BinaryColumnStatsData & rhs) const
   {
     if (!(maxColLen == rhs.maxColLen))
@@ -2629,6 +2680,10 @@ class BinaryColumnStatsData {
       return false;
     if (!(numNulls == rhs.numNulls))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const BinaryColumnStatsData &rhs) const {
@@ -2697,9 +2752,10 @@ inline std::ostream& operator<<(std::ostream& out, const Decimal& obj)
 }
 
 typedef struct _DecimalColumnStatsData__isset {
-  _DecimalColumnStatsData__isset() : lowValue(false), highValue(false) {}
+  _DecimalColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
   bool lowValue :1;
   bool highValue :1;
+  bool bitVectors :1;
 } _DecimalColumnStatsData__isset;
 
 class DecimalColumnStatsData {
@@ -2707,7 +2763,7 @@ class DecimalColumnStatsData {
 
   DecimalColumnStatsData(const DecimalColumnStatsData&);
   DecimalColumnStatsData& operator=(const DecimalColumnStatsData&);
-  DecimalColumnStatsData() : numNulls(0), numDVs(0) {
+  DecimalColumnStatsData() : numNulls(0), numDVs(0), bitVectors() {
   }
 
   virtual ~DecimalColumnStatsData() throw();
@@ -2715,6 +2771,7 @@ class DecimalColumnStatsData {
   Decimal highValue;
   int64_t numNulls;
   int64_t numDVs;
+  std::string bitVectors;
 
   _DecimalColumnStatsData__isset __isset;
 
@@ -2726,6 +2783,8 @@ class DecimalColumnStatsData {
 
   void __set_numDVs(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const DecimalColumnStatsData & rhs) const
   {
     if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2740,6 +2799,10 @@ class DecimalColumnStatsData {
       return false;
     if (!(numDVs == rhs.numDVs))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const DecimalColumnStatsData &rhs) const {
@@ -2803,9 +2866,10 @@ inline std::ostream& operator<<(std::ostream& out, const Date& obj)
 }
 
 typedef struct _DateColumnStatsData__isset {
-  _DateColumnStatsData__isset() : lowValue(false), highValue(false) {}
+  _DateColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
   bool lowValue :1;
   bool highValue :1;
+  bool bitVectors :1;
 } _DateColumnStatsData__isset;
 
 class DateColumnStatsData {
@@ -2813,7 +2877,7 @@ class DateColumnStatsData {
 
   DateColumnStatsData(const DateColumnStatsData&);
   DateColumnStatsData& operator=(const DateColumnStatsData&);
-  DateColumnStatsData() : numNulls(0), numDVs(0) {
+  DateColumnStatsData() : numNulls(0), numDVs(0), bitVectors() {
   }
 
   virtual ~DateColumnStatsData() throw();
@@ -2821,6 +2885,7 @@ class DateColumnStatsData {
   Date highValue;
   int64_t numNulls;
   int64_t numDVs;
+  std::string bitVectors;
 
   _DateColumnStatsData__isset __isset;
 
@@ -2832,6 +2897,8 @@ class DateColumnStatsData {
 
   void __set_numDVs(const int64_t val);
 
+  void __set_bitVectors(const std::string& val);
+
   bool operator == (const DateColumnStatsData & rhs) const
   {
     if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2846,6 +2913,10 @@ class DateColumnStatsData {
       return false;
     if (!(numDVs == rhs.numDVs))
       return false;
+    if (__isset.bitVectors != rhs.__isset.bitVectors)
+      return false;
+    else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+      return false;
     return true;
   }
   bool operator != (const DateColumnStatsData &rhs) const {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
index 84e393c..eeb5105 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
@@ -41,6 +41,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
   private static final org.apache.thrift.protocol.TField MAX_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("maxColLen", org.apache.thrift.protocol.TType.I64, (short)1);
   private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -51,12 +52,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
   private long maxColLen; // required
   private double avgColLen; // required
   private long numNulls; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     MAX_COL_LEN((short)1, "maxColLen"),
     AVG_COL_LEN((short)2, "avgColLen"),
-    NUM_NULLS((short)3, "numNulls");
+    NUM_NULLS((short)3, "numNulls"),
+    BIT_VECTORS((short)4, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -77,6 +80,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
           return AVG_COL_LEN;
         case 3: // NUM_NULLS
           return NUM_NULLS;
+        case 4: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -121,6 +126,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
   private static final int __AVGCOLLEN_ISSET_ID = 1;
   private static final int __NUMNULLS_ISSET_ID = 2;
   private byte __isset_bitfield = 0;
+  private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -130,6 +136,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
     tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BinaryColumnStatsData.class, metaDataMap);
   }
@@ -159,6 +167,9 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     this.maxColLen = other.maxColLen;
     this.avgColLen = other.avgColLen;
     this.numNulls = other.numNulls;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public BinaryColumnStatsData deepCopy() {
@@ -173,6 +184,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     this.avgColLen = 0.0;
     setNumNullsIsSet(false);
     this.numNulls = 0;
+    this.bitVectors = null;
   }
 
   public long getMaxColLen() {
@@ -241,6 +253,29 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case MAX_COL_LEN:
@@ -267,6 +302,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -281,6 +324,9 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     case NUM_NULLS:
       return getNumNulls();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -298,6 +344,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
       return isSetAvgColLen();
     case NUM_NULLS:
       return isSetNumNulls();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -342,6 +390,15 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -364,6 +421,11 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     if (present_numNulls)
       list.add(numNulls);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -405,6 +467,16 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -436,6 +508,16 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
     sb.append("numNulls:");
     sb.append(this.numNulls);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -517,6 +599,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 4: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -539,6 +629,13 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
       oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC);
       oprot.writeI64(struct.numNulls);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -559,6 +656,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
       oprot.writeI64(struct.maxColLen);
       oprot.writeDouble(struct.avgColLen);
       oprot.writeI64(struct.numNulls);
+      BitSet optionals = new BitSet();
+      if (struct.isSetBitVectors()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -570,6 +675,11 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
       struct.setAvgColLenIsSet(true);
       struct.numNulls = iprot.readI64();
       struct.setNumNullsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 


[2/4] hive git commit: HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)

Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
index 409c247..77dd9a6 100644
--- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
+++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
@@ -3749,6 +3749,7 @@ class BooleanColumnStatsData:
    - numTrues
    - numFalses
    - numNulls
+   - bitVectors
   """
 
   thrift_spec = (
@@ -3756,12 +3757,14 @@ class BooleanColumnStatsData:
     (1, TType.I64, 'numTrues', None, None, ), # 1
     (2, TType.I64, 'numFalses', None, None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
+    (4, TType.STRING, 'bitVectors', None, None, ), # 4
   )
 
-  def __init__(self, numTrues=None, numFalses=None, numNulls=None,):
+  def __init__(self, numTrues=None, numFalses=None, numNulls=None, bitVectors=None,):
     self.numTrues = numTrues
     self.numFalses = numFalses
     self.numNulls = numNulls
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -3787,6 +3790,11 @@ class BooleanColumnStatsData:
           self.numNulls = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 4:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -3809,6 +3817,10 @@ class BooleanColumnStatsData:
       oprot.writeFieldBegin('numNulls', TType.I64, 3)
       oprot.writeI64(self.numNulls)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 4)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -3827,6 +3839,7 @@ class BooleanColumnStatsData:
     value = (value * 31) ^ hash(self.numTrues)
     value = (value * 31) ^ hash(self.numFalses)
     value = (value * 31) ^ hash(self.numNulls)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -3847,6 +3860,7 @@ class DoubleColumnStatsData:
    - highValue
    - numNulls
    - numDVs
+   - bitVectors
   """
 
   thrift_spec = (
@@ -3855,13 +3869,15 @@ class DoubleColumnStatsData:
     (2, TType.DOUBLE, 'highValue', None, None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
     (4, TType.I64, 'numDVs', None, None, ), # 4
+    (5, TType.STRING, 'bitVectors', None, None, ), # 5
   )
 
-  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
     self.lowValue = lowValue
     self.highValue = highValue
     self.numNulls = numNulls
     self.numDVs = numDVs
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -3892,6 +3908,11 @@ class DoubleColumnStatsData:
           self.numDVs = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 5:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -3918,6 +3939,10 @@ class DoubleColumnStatsData:
       oprot.writeFieldBegin('numDVs', TType.I64, 4)
       oprot.writeI64(self.numDVs)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -3935,6 +3960,7 @@ class DoubleColumnStatsData:
     value = (value * 31) ^ hash(self.highValue)
     value = (value * 31) ^ hash(self.numNulls)
     value = (value * 31) ^ hash(self.numDVs)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -3955,6 +3981,7 @@ class LongColumnStatsData:
    - highValue
    - numNulls
    - numDVs
+   - bitVectors
   """
 
   thrift_spec = (
@@ -3963,13 +3990,15 @@ class LongColumnStatsData:
     (2, TType.I64, 'highValue', None, None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
     (4, TType.I64, 'numDVs', None, None, ), # 4
+    (5, TType.STRING, 'bitVectors', None, None, ), # 5
   )
 
-  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
     self.lowValue = lowValue
     self.highValue = highValue
     self.numNulls = numNulls
     self.numDVs = numDVs
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4000,6 +4029,11 @@ class LongColumnStatsData:
           self.numDVs = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 5:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -4026,6 +4060,10 @@ class LongColumnStatsData:
       oprot.writeFieldBegin('numDVs', TType.I64, 4)
       oprot.writeI64(self.numDVs)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -4043,6 +4081,7 @@ class LongColumnStatsData:
     value = (value * 31) ^ hash(self.highValue)
     value = (value * 31) ^ hash(self.numNulls)
     value = (value * 31) ^ hash(self.numDVs)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -4063,6 +4102,7 @@ class StringColumnStatsData:
    - avgColLen
    - numNulls
    - numDVs
+   - bitVectors
   """
 
   thrift_spec = (
@@ -4071,13 +4111,15 @@ class StringColumnStatsData:
     (2, TType.DOUBLE, 'avgColLen', None, None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
     (4, TType.I64, 'numDVs', None, None, ), # 4
+    (5, TType.STRING, 'bitVectors', None, None, ), # 5
   )
 
-  def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None,):
+  def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None, bitVectors=None,):
     self.maxColLen = maxColLen
     self.avgColLen = avgColLen
     self.numNulls = numNulls
     self.numDVs = numDVs
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4108,6 +4150,11 @@ class StringColumnStatsData:
           self.numDVs = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 5:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -4134,6 +4181,10 @@ class StringColumnStatsData:
       oprot.writeFieldBegin('numDVs', TType.I64, 4)
       oprot.writeI64(self.numDVs)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -4155,6 +4206,7 @@ class StringColumnStatsData:
     value = (value * 31) ^ hash(self.avgColLen)
     value = (value * 31) ^ hash(self.numNulls)
     value = (value * 31) ^ hash(self.numDVs)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -4174,6 +4226,7 @@ class BinaryColumnStatsData:
    - maxColLen
    - avgColLen
    - numNulls
+   - bitVectors
   """
 
   thrift_spec = (
@@ -4181,12 +4234,14 @@ class BinaryColumnStatsData:
     (1, TType.I64, 'maxColLen', None, None, ), # 1
     (2, TType.DOUBLE, 'avgColLen', None, None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
+    (4, TType.STRING, 'bitVectors', None, None, ), # 4
   )
 
-  def __init__(self, maxColLen=None, avgColLen=None, numNulls=None,):
+  def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, bitVectors=None,):
     self.maxColLen = maxColLen
     self.avgColLen = avgColLen
     self.numNulls = numNulls
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4212,6 +4267,11 @@ class BinaryColumnStatsData:
           self.numNulls = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 4:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -4234,6 +4294,10 @@ class BinaryColumnStatsData:
       oprot.writeFieldBegin('numNulls', TType.I64, 3)
       oprot.writeI64(self.numNulls)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 4)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -4252,6 +4316,7 @@ class BinaryColumnStatsData:
     value = (value * 31) ^ hash(self.maxColLen)
     value = (value * 31) ^ hash(self.avgColLen)
     value = (value * 31) ^ hash(self.numNulls)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -4355,6 +4420,7 @@ class DecimalColumnStatsData:
    - highValue
    - numNulls
    - numDVs
+   - bitVectors
   """
 
   thrift_spec = (
@@ -4363,13 +4429,15 @@ class DecimalColumnStatsData:
     (2, TType.STRUCT, 'highValue', (Decimal, Decimal.thrift_spec), None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
     (4, TType.I64, 'numDVs', None, None, ), # 4
+    (5, TType.STRING, 'bitVectors', None, None, ), # 5
   )
 
-  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
     self.lowValue = lowValue
     self.highValue = highValue
     self.numNulls = numNulls
     self.numDVs = numDVs
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4402,6 +4470,11 @@ class DecimalColumnStatsData:
           self.numDVs = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 5:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -4428,6 +4501,10 @@ class DecimalColumnStatsData:
       oprot.writeFieldBegin('numDVs', TType.I64, 4)
       oprot.writeI64(self.numDVs)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -4445,6 +4522,7 @@ class DecimalColumnStatsData:
     value = (value * 31) ^ hash(self.highValue)
     value = (value * 31) ^ hash(self.numNulls)
     value = (value * 31) ^ hash(self.numDVs)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):
@@ -4532,6 +4610,7 @@ class DateColumnStatsData:
    - highValue
    - numNulls
    - numDVs
+   - bitVectors
   """
 
   thrift_spec = (
@@ -4540,13 +4619,15 @@ class DateColumnStatsData:
     (2, TType.STRUCT, 'highValue', (Date, Date.thrift_spec), None, ), # 2
     (3, TType.I64, 'numNulls', None, None, ), # 3
     (4, TType.I64, 'numDVs', None, None, ), # 4
+    (5, TType.STRING, 'bitVectors', None, None, ), # 5
   )
 
-  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+  def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
     self.lowValue = lowValue
     self.highValue = highValue
     self.numNulls = numNulls
     self.numDVs = numDVs
+    self.bitVectors = bitVectors
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4579,6 +4660,11 @@ class DateColumnStatsData:
           self.numDVs = iprot.readI64()
         else:
           iprot.skip(ftype)
+      elif fid == 5:
+        if ftype == TType.STRING:
+          self.bitVectors = iprot.readString()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -4605,6 +4691,10 @@ class DateColumnStatsData:
       oprot.writeFieldBegin('numDVs', TType.I64, 4)
       oprot.writeI64(self.numDVs)
       oprot.writeFieldEnd()
+    if self.bitVectors is not None:
+      oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+      oprot.writeString(self.bitVectors)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -4622,6 +4712,7 @@ class DateColumnStatsData:
     value = (value * 31) ^ hash(self.highValue)
     value = (value * 31) ^ hash(self.numNulls)
     value = (value * 31) ^ hash(self.numDVs)
+    value = (value * 31) ^ hash(self.bitVectors)
     return value
 
   def __repr__(self):

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
index a473611..2cf433b 100644
--- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
+++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
@@ -824,11 +824,13 @@ class BooleanColumnStatsData
   NUMTRUES = 1
   NUMFALSES = 2
   NUMNULLS = 3
+  BITVECTORS = 4
 
   FIELDS = {
     NUMTRUES => {:type => ::Thrift::Types::I64, :name => 'numTrues'},
     NUMFALSES => {:type => ::Thrift::Types::I64, :name => 'numFalses'},
-    NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}
+    NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -848,12 +850,14 @@ class DoubleColumnStatsData
   HIGHVALUE = 2
   NUMNULLS = 3
   NUMDVS = 4
+  BITVECTORS = 5
 
   FIELDS = {
     LOWVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'lowValue', :optional => true},
     HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue', :optional => true},
     NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
-    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -872,12 +876,14 @@ class LongColumnStatsData
   HIGHVALUE = 2
   NUMNULLS = 3
   NUMDVS = 4
+  BITVECTORS = 5
 
   FIELDS = {
     LOWVALUE => {:type => ::Thrift::Types::I64, :name => 'lowValue', :optional => true},
     HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue', :optional => true},
     NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
-    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -896,12 +902,14 @@ class StringColumnStatsData
   AVGCOLLEN = 2
   NUMNULLS = 3
   NUMDVS = 4
+  BITVECTORS = 5
 
   FIELDS = {
     MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'},
     AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'},
     NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
-    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -921,11 +929,13 @@ class BinaryColumnStatsData
   MAXCOLLEN = 1
   AVGCOLLEN = 2
   NUMNULLS = 3
+  BITVECTORS = 4
 
   FIELDS = {
     MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'},
     AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'},
-    NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}
+    NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -965,12 +975,14 @@ class DecimalColumnStatsData
   HIGHVALUE = 2
   NUMNULLS = 3
   NUMDVS = 4
+  BITVECTORS = 5
 
   FIELDS = {
     LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal, :optional => true},
     HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal, :optional => true},
     NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
-    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end
@@ -1006,12 +1018,14 @@ class DateColumnStatsData
   HIGHVALUE = 2
   NUMNULLS = 3
   NUMDVS = 4
+  BITVECTORS = 5
 
   FIELDS = {
     LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Date, :optional => true},
     HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Date, :optional => true},
     NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
-    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+    NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+    BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
   }
 
   def struct_fields; FIELDS; end

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
new file mode 100644
index 0000000..92f9a84
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
@@ -0,0 +1,367 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore;
+import java.util.Random;
+
+import javolution.util.FastBitSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.io.Text;
+
+/*
+ * https://en.wikipedia.org/wiki/Flajolet%E2%80%93Martin_algorithm
+ * We implement Flajolet–Martin algorithm in this class.
+ * The Flajolet–Martin algorithm is an algorithm for approximating the number of distinct elements 
+ * in a stream with a single pass and space-consumption which is logarithmic in the maximum number 
+ * of possible distinct elements in the stream. The algorithm was introduced by Philippe Flajolet 
+ * and G. Nigel Martin in their 1984 paper "Probabilistic Counting Algorithms for Data Base Applications".
+ * Later it has been refined in the papers "LogLog counting of large cardinalities" by Marianne Durand 
+ * and Philippe Flajolet, and "HyperLogLog: The analysis of a near-optimal cardinality estimation 
+ * algorithm" by Philippe Flajolet et al.
+ */
+
+/*
+ * The algorithm works like this.
+ * (1) Set the number of bit vectors, i.e., numBitVectors, based on the precision.
+ * (2) For each bit vector, generate hash value of the long value and mod it by 2^bitVectorSize-1. (addToEstimator)
+ * (3) Set the index (addToEstimator)
+ * (4) Take the average of the index for all the bit vectors and get the estimated NDV (estimateNumDistinctValues).
+ */
+public class NumDistinctValueEstimator {
+
+  static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName());
+
+  /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
+   * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
+   * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise
+   * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
+   * thus introducing errors in the estimates.
+   */
+  private static final int BIT_VECTOR_SIZE = 31;
+  private final int numBitVectors;
+
+  // Refer to Flajolet-Martin'86 for the value of phi
+  private static final double PHI = 0.77351;
+
+  private final int[] a;
+  private final int[] b;
+  private final FastBitSet[] bitVector;
+
+  private final Random aValue;
+  private final Random bValue;
+
+  /* Create a new distinctValueEstimator
+   */
+  public NumDistinctValueEstimator(int numBitVectors) {
+    this.numBitVectors = numBitVectors;
+    bitVector = new FastBitSet[numBitVectors];
+    for (int i=0; i< numBitVectors; i++) {
+      bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
+    }
+
+    a = new int[numBitVectors];
+    b = new int[numBitVectors];
+
+    /* Use a large prime number as a seed to the random number generator.
+     * Java's random number generator uses the Linear Congruential Generator to generate random
+     * numbers using the following recurrence relation,
+     *
+     * X(n+1) = (a X(n) + c ) mod m
+     *
+     *  where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48
+     *  is not a prime number and hence the set of numbers from 0 to m don't form a finite field.
+     *  If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair
+     *  wise independent.
+     *
+     *  However, empirically passing in prime numbers as seeds seems to work better than when passing
+     *  composite numbers as seeds. Ideally Java's Random should pick m such that m is prime.
+     *
+     */
+    aValue = new Random(99397);
+    bValue = new Random(9876413);
+
+    for (int i = 0; i < numBitVectors; i++) {
+      int randVal;
+      /* a and b shouldn't be even; If a and b are even, then none of the values
+       * will set bit 0 thus introducing errors in the estimate. Both a and b can be even
+       * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this
+       * always pick odd values for a and b.
+       */
+      do {
+        randVal = aValue.nextInt();
+      } while (randVal % 2 == 0);
+
+      a[i] = randVal;
+
+      do {
+        randVal = bValue.nextInt();
+      } while (randVal % 2 == 0);
+
+      b[i] = randVal;
+
+      if (a[i] < 0) {
+        a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1);
+      }
+
+      if (b[i] < 0) {
+        b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1);
+      }
+    }
+  }
+
+  public NumDistinctValueEstimator(String s, int numBitVectors) {
+    this.numBitVectors = numBitVectors;
+    FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors);
+    bitVector = new FastBitSet[numBitVectors];
+    for(int i=0; i <numBitVectors; i++) {
+       bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
+       bitVector[i].clear();
+       bitVector[i].or(bitVectorDeser[i]);
+    }
+
+    a = null;
+    b = null;
+
+    aValue = null;
+    bValue = null;
+  }
+
+  /**
+   * Resets a distinctValueEstimator object to its original state.
+   */
+  public void reset() {
+    for (int i=0; i< numBitVectors; i++) {
+      bitVector[i].clear();
+    }
+  }
+
+  public FastBitSet getBitVector(int index) {
+    return bitVector[index];
+  }
+
+  public int getnumBitVectors() {
+    return numBitVectors;
+  }
+
+  public int getBitVectorSize() {
+    return BIT_VECTOR_SIZE;
+  }
+
+  public void printNumDistinctValueEstimator() {
+    String t = new String();
+
+    LOG.debug("NumDistinctValueEstimator");
+    LOG.debug("Number of Vectors:");
+    LOG.debug(numBitVectors);
+    LOG.debug("Vector Size: ");
+    LOG.debug(BIT_VECTOR_SIZE);
+
+    for (int i=0; i < numBitVectors; i++) {
+      t = t + bitVector[i].toString();
+    }
+
+    LOG.debug("Serialized Vectors: ");
+    LOG.debug(t);
+  }
+
+  /* Serializes a distinctValueEstimator object to Text for transport.
+   *
+   */
+  public Text serialize() {
+    String s = new String();
+    for(int i=0; i < numBitVectors; i++) {
+      s = s + (bitVector[i].toString());
+    }
+    return new Text(s);
+  }
+
+  /* Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator object and
+   * returns it.
+   */
+
+  private FastBitSet[] deserialize(String s, int numBitVectors) {
+    FastBitSet[] b = new FastBitSet[numBitVectors];
+    for (int j=0; j < numBitVectors; j++) {
+      b[j] = new FastBitSet(BIT_VECTOR_SIZE);
+      b[j].clear();
+    }
+
+    int vectorIndex =0;
+
+    /* Parse input string to obtain the indexes that are set in the bitvector.
+     * When a toString() is called on a FastBitSet object to serialize it, the serialization
+     * adds { and } to the beginning and end of the return String.
+     * Skip "{", "}", ",", " " in the input string.
+     */
+    for(int i=1; i < s.length()-1;) {
+      char c = s.charAt(i);
+      i = i + 1;
+
+      // Move on to the next bit vector
+      if (c == '}') {
+         vectorIndex = vectorIndex + 1;
+      }
+
+      // Encountered a numeric value; Extract out the entire number
+      if (c >= '0' && c <= '9') {
+        String t = new String();
+        t = t + c;
+        c = s.charAt(i);
+        i = i + 1;
+
+        while (c != ',' && c!= '}') {
+          t = t + c;
+          c = s.charAt(i);
+          i = i + 1;
+        }
+
+        int bitIndex = Integer.parseInt(t);
+        assert(bitIndex >= 0);
+        assert(vectorIndex < numBitVectors);
+        b[vectorIndex].set(bitIndex);
+        if (c == '}') {
+          vectorIndex =  vectorIndex + 1;
+        }
+      }
+    }
+    return b;
+  }
+
+  private int generateHash(long v, int hashNum) {
+    int mod = (1<<BIT_VECTOR_SIZE) - 1;
+    long tempHash = a[hashNum] * v  + b[hashNum];
+    tempHash %= mod;
+    int hash = (int) tempHash;
+
+    /* Hash function should map the long value to 0...2^L-1.
+     * Hence hash value has to be non-negative.
+     */
+    if (hash < 0) {
+      hash = hash + mod;
+    }
+    return hash;
+  }
+
+  private int generateHashForPCSA(long v) {
+    return generateHash(v, 0);
+  }
+
+  public void addToEstimator(long v) {
+    /* Update summary bitVector :
+     * Generate hash value of the long value and mod it by 2^bitVectorSize-1.
+     * In this implementation bitVectorSize is 31.
+     */
+
+    for (int i = 0; i<numBitVectors; i++) {
+      int hash = generateHash(v,i);
+      int index;
+
+      // Find the index of the least significant bit that is 1
+      for (index=0; index<BIT_VECTOR_SIZE; index++) {
+        if (hash % 2 != 0) {
+          break;
+        }
+        hash = hash >> 1;
+      }
+
+      // Set bitvector[index] := 1
+      bitVector[i].set(index);
+    }
+  }
+
+  public void addToEstimatorPCSA(long v) {
+    int hash = generateHashForPCSA(v);
+    int rho = hash/numBitVectors;
+    int index;
+
+    // Find the index of the least significant bit that is 1
+    for (index=0; index<BIT_VECTOR_SIZE; index++) {
+      if (rho % 2 != 0) {
+        break;
+      }
+      rho = rho >> 1;
+    }
+
+    // Set bitvector[index] := 1
+    bitVector[hash%numBitVectors].set(index);
+  }
+
+  public void addToEstimator(double d) {
+    int v = new Double(d).hashCode();
+    addToEstimator(v);
+  }
+
+  public void addToEstimatorPCSA(double d) {
+    int v = new Double(d).hashCode();
+    addToEstimatorPCSA(v);
+  }
+
+  public void addToEstimator(HiveDecimal decimal) {
+    int v = decimal.hashCode();
+    addToEstimator(v);
+  }
+
+  public void addToEstimatorPCSA(HiveDecimal decimal) {
+    int v = decimal.hashCode();
+    addToEstimatorPCSA(v);
+  }
+
+  public void mergeEstimators(NumDistinctValueEstimator o) {
+    // Bitwise OR the bitvector with the bitvector in the agg buffer
+    for (int i=0; i<numBitVectors; i++) {
+      bitVector[i].or(o.getBitVector(i));
+    }
+  }
+
+  public long estimateNumDistinctValuesPCSA() {
+    double numDistinctValues = 0.0;
+    long S = 0;
+
+    for (int i=0; i < numBitVectors; i++) {
+      int index = 0;
+      while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) {
+        index = index + 1;
+      }
+      S = S + index;
+    }
+
+    numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors));
+    return ((long)numDistinctValues);
+  }
+
+  /* We use the Flajolet-Martin estimator to estimate the number of distinct values.FM uses the
+   * location of the least significant zero as an estimate of log2(phi*ndvs).
+   */
+  public long estimateNumDistinctValues() {
+    int sumLeastSigZero = 0;
+    double avgLeastSigZero;
+    double numDistinctValues;
+
+    for (int i=0; i< numBitVectors; i++) {
+      int leastSigZero = bitVector[i].nextClearBit(0);
+      sumLeastSigZero += leastSigZero;
+    }
+
+    avgLeastSigZero =
+        (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(PHI)/Math.log(2.0));
+    numDistinctValues = Math.pow(2.0, avgLeastSigZero);
+    return ((long)(numDistinctValues));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
index f4df2e2..d6d01bd 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
@@ -1133,9 +1133,8 @@ class HBaseUtils {
     return proto.toByteArray();
   }
 
-  private static HbaseMetastoreProto.ColumnStats
-  protoBufStatsForOneColumn(ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats)
-      throws IOException {
+  private static HbaseMetastoreProto.ColumnStats protoBufStatsForOneColumn(
+      ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats) throws IOException {
     HbaseMetastoreProto.ColumnStats.Builder builder = HbaseMetastoreProto.ColumnStats.newBuilder();
     if (partitionColumnStats != null) {
       builder.setLastAnalyzed(partitionColumnStats.getStatsDesc().getLastAnalyzed());
@@ -1147,80 +1146,77 @@ class HBaseUtils {
 
     ColumnStatisticsData colData = colStats.getStatsData();
     switch (colData.getSetField()) {
-      case BOOLEAN_STATS:
-        BooleanColumnStatsData boolData = colData.getBooleanStats();
-        builder.setNumNulls(boolData.getNumNulls());
-        builder.setBoolStats(
-            HbaseMetastoreProto.ColumnStats.BooleanStats.newBuilder()
-                .setNumTrues(boolData.getNumTrues())
-                .setNumFalses(boolData.getNumFalses())
-                .build());
-        break;
-
-      case LONG_STATS:
-        LongColumnStatsData longData = colData.getLongStats();
-        builder.setNumNulls(longData.getNumNulls());
-        builder.setNumDistinctValues(longData.getNumDVs());
-        builder.setLongStats(
-            HbaseMetastoreProto.ColumnStats.LongStats.newBuilder()
-                .setLowValue(longData.getLowValue())
-                .setHighValue(longData.getHighValue())
-                .build());
-        break;
-
-      case DOUBLE_STATS:
-        DoubleColumnStatsData doubleData = colData.getDoubleStats();
-        builder.setNumNulls(doubleData.getNumNulls());
-        builder.setNumDistinctValues(doubleData.getNumDVs());
-        builder.setDoubleStats(
-            HbaseMetastoreProto.ColumnStats.DoubleStats.newBuilder()
-                .setLowValue(doubleData.getLowValue())
-                .setHighValue(doubleData.getHighValue())
-                .build());
-        break;
-
-      case STRING_STATS:
-        StringColumnStatsData stringData = colData.getStringStats();
-        builder.setNumNulls(stringData.getNumNulls());
-        builder.setNumDistinctValues(stringData.getNumDVs());
-        builder.setStringStats(
-            HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
-                .setMaxColLength(stringData.getMaxColLen())
-                .setAvgColLength(stringData.getAvgColLen())
-                .build());
-        break;
-
-      case BINARY_STATS:
-        BinaryColumnStatsData binaryData = colData.getBinaryStats();
-        builder.setNumNulls(binaryData.getNumNulls());
-        builder.setBinaryStats(
-            HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
-                .setMaxColLength(binaryData.getMaxColLen())
-                .setAvgColLength(binaryData.getAvgColLen())
-                .build());
-        break;
-
-      case DECIMAL_STATS:
-        DecimalColumnStatsData decimalData = colData.getDecimalStats();
-        builder.setNumNulls(decimalData.getNumNulls());
-        builder.setNumDistinctValues(decimalData.getNumDVs());
-        builder.setDecimalStats(
-            HbaseMetastoreProto.ColumnStats.DecimalStats.newBuilder()
-                .setLowValue(
-                    HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+    case BOOLEAN_STATS:
+      BooleanColumnStatsData boolData = colData.getBooleanStats();
+      builder.setNumNulls(boolData.getNumNulls());
+      builder.setBoolStats(HbaseMetastoreProto.ColumnStats.BooleanStats.newBuilder()
+          .setNumTrues(boolData.getNumTrues()).setNumFalses(boolData.getNumFalses()).build());
+      break;
+
+    case LONG_STATS:
+      LongColumnStatsData longData = colData.getLongStats();
+      builder.setNumNulls(longData.getNumNulls());
+      builder.setNumDistinctValues(longData.getNumDVs());
+      if (longData.isSetBitVectors()) {
+        builder.setBitVectors(longData.getBitVectors());
+      }
+      builder.setLongStats(HbaseMetastoreProto.ColumnStats.LongStats.newBuilder()
+          .setLowValue(longData.getLowValue()).setHighValue(longData.getHighValue()).build());
+      break;
+
+    case DOUBLE_STATS:
+      DoubleColumnStatsData doubleData = colData.getDoubleStats();
+      builder.setNumNulls(doubleData.getNumNulls());
+      builder.setNumDistinctValues(doubleData.getNumDVs());
+      if (doubleData.isSetBitVectors()) {
+        builder.setBitVectors(doubleData.getBitVectors());
+      }
+      builder.setDoubleStats(HbaseMetastoreProto.ColumnStats.DoubleStats.newBuilder()
+          .setLowValue(doubleData.getLowValue()).setHighValue(doubleData.getHighValue()).build());
+      break;
+
+    case STRING_STATS:
+      StringColumnStatsData stringData = colData.getStringStats();
+      builder.setNumNulls(stringData.getNumNulls());
+      builder.setNumDistinctValues(stringData.getNumDVs());
+      if (stringData.isSetBitVectors()) {
+        builder.setBitVectors(stringData.getBitVectors());
+      }
+      builder.setStringStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
+          .setMaxColLength(stringData.getMaxColLen()).setAvgColLength(stringData.getAvgColLen())
+          .build());
+      break;
+
+    case BINARY_STATS:
+      BinaryColumnStatsData binaryData = colData.getBinaryStats();
+      builder.setNumNulls(binaryData.getNumNulls());
+      builder.setBinaryStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
+          .setMaxColLength(binaryData.getMaxColLen()).setAvgColLength(binaryData.getAvgColLen())
+          .build());
+      break;
+
+    case DECIMAL_STATS:
+      DecimalColumnStatsData decimalData = colData.getDecimalStats();
+      builder.setNumNulls(decimalData.getNumNulls());
+      builder.setNumDistinctValues(decimalData.getNumDVs());
+      if (decimalData.isSetBitVectors()) {
+        builder.setBitVectors(decimalData.getBitVectors());
+      }
+      builder.setDecimalStats(
+          HbaseMetastoreProto.ColumnStats.DecimalStats
+              .newBuilder()
+              .setLowValue(
+                  HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
                       .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled()))
-                      .setScale(decimalData.getLowValue().getScale())
-                      .build())
-                .setHighValue(
-                    HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
-                    .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
-                    .setScale(decimalData.getHighValue().getScale())
-                    .build()))
-                .build();
-        break;
-
-      default:
-        throw new RuntimeException("Woh, bad.  Unknown stats type!");
+                      .setScale(decimalData.getLowValue().getScale()).build())
+              .setHighValue(
+                  HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+                      .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
+                      .setScale(decimalData.getHighValue().getScale()).build())).build();
+      break;
+
+    default:
+      throw new RuntimeException("Woh, bad.  Unknown stats type!");
     }
     return builder.build();
   }
@@ -1265,6 +1261,7 @@ class HBaseUtils {
       }
       longData.setNumNulls(proto.getNumNulls());
       longData.setNumDVs(proto.getNumDistinctValues());
+      longData.setBitVectors(proto.getBitVectors());
       colData.setLongStats(longData);
     } else if (proto.hasDoubleStats()) {
       DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
@@ -1276,6 +1273,7 @@ class HBaseUtils {
       }
       doubleData.setNumNulls(proto.getNumNulls());
       doubleData.setNumDVs(proto.getNumDistinctValues());
+      doubleData.setBitVectors(proto.getBitVectors());
       colData.setDoubleStats(doubleData);
     } else if (proto.hasStringStats()) {
       StringColumnStatsData stringData = new StringColumnStatsData();
@@ -1283,6 +1281,7 @@ class HBaseUtils {
       stringData.setAvgColLen(proto.getStringStats().getAvgColLength());
       stringData.setNumNulls(proto.getNumNulls());
       stringData.setNumDVs(proto.getNumDistinctValues());
+      stringData.setBitVectors(proto.getBitVectors());
       colData.setStringStats(stringData);
     } else if (proto.hasBinaryStats()) {
       BinaryColumnStatsData binaryData = new BinaryColumnStatsData();
@@ -1306,6 +1305,7 @@ class HBaseUtils {
       }
       decimalData.setNumNulls(proto.getNumNulls());
       decimalData.setNumDVs(proto.getNumDistinctValues());
+      decimalData.setBitVectors(proto.getBitVectors());
       colData.setDecimalStats(decimalData);
     } else {
       throw new RuntimeException("Woh, bad.  Unknown stats type!");

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
index 5ec60be..f1d2e50 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
@@ -23,9 +23,11 @@ import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
 import com.google.protobuf.ByteString;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -72,7 +74,7 @@ class StatsCache {
     return self;
   }
 
-  private StatsCache(Configuration conf) {
+  private StatsCache(final Configuration conf) {
     final StatsCache me = this;
     cache = CacheBuilder.newBuilder()
         .maximumSize(
@@ -82,6 +84,7 @@ class StatsCache {
         .build(new CacheLoader<StatsCacheKey, AggrStats>() {
           @Override
           public AggrStats load(StatsCacheKey key) throws Exception {
+            int numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
             HBaseReadWrite hrw = HBaseReadWrite.getInstance();
             AggrStats aggrStats = hrw.getAggregatedStats(key.hashed);
             if (aggrStats == null) {
@@ -103,7 +106,7 @@ class StatsCache {
                     }
                     if (aggregator == null) {
                       aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(
-                          cso.getStatsData().getSetField());
+                          cso.getStatsData().getSetField(), numBitVectors);
                     }
                     aggregator.aggregate(statsObj, cso);
                   }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
index bbd2c7b..40340dd 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
@@ -22,7 +22,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 
-public class BinaryColumnStatsAggregator implements ColumnStatsAggregator{
+public class BinaryColumnStatsAggregator extends ColumnStatsAggregator{
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
index 9047f68..735d965 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
@@ -22,7 +22,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 
-public class BooleanColumnStatsAggregator implements ColumnStatsAggregator {
+public class BooleanColumnStatsAggregator extends ColumnStatsAggregator {
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
index 217b654..694e53b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
@@ -19,8 +19,10 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 
-public interface ColumnStatsAggregator {
-  public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats);
+public abstract class ColumnStatsAggregator {
+  NumDistinctValueEstimator ndvEstimator = null;
+  public abstract void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats);
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
index a8dbc1f..8eb127b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
@@ -19,6 +19,7 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -34,23 +35,34 @@ public class ColumnStatsAggregatorFactory {
   private ColumnStatsAggregatorFactory() {
   }
 
-  public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type) {
+  public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int numBitVectors) {
+    ColumnStatsAggregator agg;
     switch (type) {
     case BOOLEAN_STATS:
-      return new BooleanColumnStatsAggregator();
+      agg = new BooleanColumnStatsAggregator();
+      break;
     case LONG_STATS:
-      return new LongColumnStatsAggregator();
+      agg = new LongColumnStatsAggregator();
+      break;
     case DOUBLE_STATS:
-      return new DoubleColumnStatsAggregator();
+      agg = new DoubleColumnStatsAggregator();
+      break;
     case STRING_STATS:
-      return new StringColumnStatsAggregator();
+      agg = new StringColumnStatsAggregator();
+      break;
     case BINARY_STATS:
-      return new BinaryColumnStatsAggregator();
+      agg = new BinaryColumnStatsAggregator();
+      break;
     case DECIMAL_STATS:
-      return new DecimalColumnStatsAggregator();
+      agg = new DecimalColumnStatsAggregator();
+      break;
     default:
       throw new RuntimeException("Woh, bad.  Unknown stats type " + type.toString());
     }
+    if (numBitVectors > 0) {
+      agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
+    }
+    return agg;
   }
 
   public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
index ec25b31..50f4325 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
@@ -19,25 +19,33 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Decimal;
 import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
 
-public class DecimalColumnStatsAggregator implements ColumnStatsAggregator {
+public class DecimalColumnStatsAggregator extends ColumnStatsAggregator {
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
     DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
     DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
-    Decimal lowValue =
-        (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
-            .getLowValue() : newData.getLowValue();
+    Decimal lowValue = aggregateData.getLowValue() != null
+        && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
+        .getLowValue() : newData.getLowValue();
     aggregateData.setLowValue(lowValue);
-    Decimal highValue =
-        (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
-            .getHighValue() : newData.getHighValue();
+    Decimal highValue = aggregateData.getHighValue() != null
+        && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
+        .getHighValue() : newData.getHighValue();
     aggregateData.setHighValue(highValue);
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
index 71af0ac..d945ec2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
@@ -19,10 +19,11 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 
-public class DoubleColumnStatsAggregator implements ColumnStatsAggregator {
+public class DoubleColumnStatsAggregator extends ColumnStatsAggregator {
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class DoubleColumnStatsAggregator implements ColumnStatsAggregator {
     aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
     aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
index 15b8cf7..068dd00 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
@@ -19,10 +19,11 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 
-public class LongColumnStatsAggregator implements ColumnStatsAggregator {
+public class LongColumnStatsAggregator extends ColumnStatsAggregator {
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class LongColumnStatsAggregator implements ColumnStatsAggregator {
     aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
     aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
index fe1a04c..aeb6c39 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
@@ -19,10 +19,11 @@
 
 package org.apache.hadoop.hive.metastore.hbase.stats;
 
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 
-public class StringColumnStatsAggregator implements ColumnStatsAggregator {
+public class StringColumnStatsAggregator extends ColumnStatsAggregator {
 
   @Override
   public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class StringColumnStatsAggregator implements ColumnStatsAggregator {
     aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
     aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
     aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
-    aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
----------------------------------------------------------------------
diff --git a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
index 0d0ef89..466fdf9 100644
--- a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
+++ b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
@@ -93,6 +93,7 @@ message ColumnStats {
   optional StringStats binary_stats = 9;
   optional DecimalStats decimal_stats = 10;
   optional string column_name = 11;
+  optional string bit_vectors = 12;
 }
 
 message Database {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
new file mode 100644
index 0000000..36c7984
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class TestHBaseAggregateStatsCacheWithBitVector {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(TestHBaseAggregateStatsCacheWithBitVector.class.getName());
+
+  @Mock
+  HTableInterface htable;
+  private HBaseStore store;
+  SortedMap<String, Cell> rows = new TreeMap<>();
+
+  @Before
+  public void before() throws IOException {
+    MockitoAnnotations.initMocks(this);
+    HiveConf conf = new HiveConf();
+    conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+    store = MockUtils.init(conf, htable, rows);
+    store.backdoor().getStatsCache().resetCounters();
+  }
+
+  private static interface Checker {
+    void checkStats(AggrStats aggrStats) throws Exception;
+  }
+
+  @Test
+  public void allPartitions() throws Exception {
+    String dbName = "default";
+    String tableName = "snp";
+    List<String> partVals1 = Arrays.asList("today");
+    List<String> partVals2 = Arrays.asList("yesterday");
+    long now = System.currentTimeMillis();
+
+    List<FieldSchema> cols = new ArrayList<>();
+    cols.add(new FieldSchema("col1", "boolean", "nocomment"));
+    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+        serde, null, null, Collections.<String, String> emptyMap());
+    List<FieldSchema> partCols = new ArrayList<>();
+    partCols.add(new FieldSchema("ds", "string", ""));
+    Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+        Collections.<String, String> emptyMap(), null, null, null);
+    store.createTable(table);
+
+    StorageDescriptor psd = new StorageDescriptor(sd);
+    psd.setLocation("file:/tmp/default/hit/ds=" + partVals1.get(0));
+    Partition part = new Partition(partVals1, dbName, tableName, (int) now, (int) now, psd,
+        Collections.<String, String> emptyMap());
+    store.addPartition(part);
+
+    psd = new StorageDescriptor(sd);
+    psd.setLocation("file:/tmp/default/hit/ds=" + partVals2.get(0));
+    part = new Partition(partVals2, dbName, tableName, (int) now, (int) now, psd,
+        Collections.<String, String> emptyMap());
+    store.addPartition(part);
+
+    ColumnStatistics cs = new ColumnStatistics();
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+    desc.setLastAnalyzed(now);
+    desc.setPartName("ds=" + partVals1.get(0));
+    cs.setStatsDesc(desc);
+    ColumnStatisticsObj obj = new ColumnStatisticsObj();
+    obj.setColName("col1");
+    obj.setColType("double");
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
+    dcsd.setHighValue(1000.2342343);
+    dcsd.setLowValue(-20.1234213423);
+    dcsd.setNumNulls(30);
+    dcsd.setNumDVs(12342);
+    dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
+    data.setDoubleStats(dcsd);
+    obj.setStatsData(data);
+    cs.addToStatsObj(obj);
+    store.updatePartitionColumnStatistics(cs, partVals1);
+
+    cs = new ColumnStatistics();
+    desc = new ColumnStatisticsDesc(false, dbName, tableName);
+    desc.setLastAnalyzed(now);
+    desc.setPartName("ds=" + partVals2.get(0));
+    cs.setStatsDesc(desc);
+    obj = new ColumnStatisticsObj();
+    obj.setColName("col1");
+    obj.setColType("double");
+    data = new ColumnStatisticsData();
+    dcsd = new DoubleColumnStatsData();
+    dcsd.setHighValue(1000.2342343);
+    dcsd.setLowValue(-20.1234213423);
+    dcsd.setNumNulls(30);
+    dcsd.setNumDVs(12342);
+    dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
+    data.setDoubleStats(dcsd);
+    obj.setStatsData(data);
+    cs.addToStatsObj(obj);
+
+    store.updatePartitionColumnStatistics(cs, partVals2);
+
+    Checker statChecker = new Checker() {
+      @Override
+      public void checkStats(AggrStats aggrStats) throws Exception {
+        Assert.assertEquals(2, aggrStats.getPartsFound());
+        Assert.assertEquals(1, aggrStats.getColStatsSize());
+        ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+        Assert.assertEquals("col1", cso.getColName());
+        Assert.assertEquals("double", cso.getColType());
+        DoubleColumnStatsData dcsd = cso.getStatsData().getDoubleStats();
+        Assert.assertEquals(1000.23, dcsd.getHighValue(), 0.01);
+        Assert.assertEquals(-20.12, dcsd.getLowValue(), 0.01);
+        Assert.assertEquals(60, dcsd.getNumNulls());
+        Assert.assertEquals(5, dcsd.getNumDVs());
+        Assert
+            .assertEquals(
+                "{0, 1, 4, 5, 7}{0, 1}{0, 1, 2, 4}{0, 1, 2, 4}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1, 2, 3, 4}{0, 1, 4}{0, 1, 3, 4, 6}{0, 2}{0, 1, 3, 8}{0, 2, 3}{0, 2}{0, 1, 9}{0, 1, 4}",
+                dcsd.getBitVectors());
+      }
+    };
+
+    AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName,
+        Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"));
+    statChecker.checkStats(aggrStats);
+
+    // Check that we had to build it from the stats
+    Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
+    Assert.assertEquals(1, store.backdoor().getStatsCache().totalGets.getCnt());
+    Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
+
+    // Call again, this time it should come from memory. Also, reverse the name
+    // order this time
+    // to assure that we still hit.
+    aggrStats = store.get_aggr_stats_for(dbName, tableName,
+        Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"));
+    statChecker.checkStats(aggrStats);
+
+    Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
+    Assert.assertEquals(2, store.backdoor().getStatsCache().totalGets.getCnt());
+    Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
+  }
+
+}


[3/4] hive git commit: HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)

Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
index 6aa4668..de39d21 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
@@ -41,6 +41,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
   private static final org.apache.thrift.protocol.TField NUM_TRUES_FIELD_DESC = new org.apache.thrift.protocol.TField("numTrues", org.apache.thrift.protocol.TType.I64, (short)1);
   private static final org.apache.thrift.protocol.TField NUM_FALSES_FIELD_DESC = new org.apache.thrift.protocol.TField("numFalses", org.apache.thrift.protocol.TType.I64, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -51,12 +52,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
   private long numTrues; // required
   private long numFalses; // required
   private long numNulls; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     NUM_TRUES((short)1, "numTrues"),
     NUM_FALSES((short)2, "numFalses"),
-    NUM_NULLS((short)3, "numNulls");
+    NUM_NULLS((short)3, "numNulls"),
+    BIT_VECTORS((short)4, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -77,6 +80,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
           return NUM_FALSES;
         case 3: // NUM_NULLS
           return NUM_NULLS;
+        case 4: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -121,6 +126,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
   private static final int __NUMFALSES_ISSET_ID = 1;
   private static final int __NUMNULLS_ISSET_ID = 2;
   private byte __isset_bitfield = 0;
+  private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -130,6 +136,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BooleanColumnStatsData.class, metaDataMap);
   }
@@ -159,6 +167,9 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     this.numTrues = other.numTrues;
     this.numFalses = other.numFalses;
     this.numNulls = other.numNulls;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public BooleanColumnStatsData deepCopy() {
@@ -173,6 +184,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     this.numFalses = 0;
     setNumNullsIsSet(false);
     this.numNulls = 0;
+    this.bitVectors = null;
   }
 
   public long getNumTrues() {
@@ -241,6 +253,29 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case NUM_TRUES:
@@ -267,6 +302,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -281,6 +324,9 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     case NUM_NULLS:
       return getNumNulls();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -298,6 +344,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
       return isSetNumFalses();
     case NUM_NULLS:
       return isSetNumNulls();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -342,6 +390,15 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -364,6 +421,11 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     if (present_numNulls)
       list.add(numNulls);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -405,6 +467,16 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -436,6 +508,16 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
     sb.append("numNulls:");
     sb.append(this.numNulls);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -517,6 +599,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 4: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -539,6 +629,13 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
       oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC);
       oprot.writeI64(struct.numNulls);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -559,6 +656,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
       oprot.writeI64(struct.numTrues);
       oprot.writeI64(struct.numFalses);
       oprot.writeI64(struct.numNulls);
+      BitSet optionals = new BitSet();
+      if (struct.isSetBitVectors()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -570,6 +675,11 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
       struct.setNumFalsesIsSet(true);
       struct.numNulls = iprot.readI64();
       struct.setNumNullsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
index 2ebb811..edc87a1 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
@@ -42,6 +42,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
   private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
   private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -53,13 +54,15 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
   private Date highValue; // optional
   private long numNulls; // required
   private long numDVs; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     LOW_VALUE((short)1, "lowValue"),
     HIGH_VALUE((short)2, "highValue"),
     NUM_NULLS((short)3, "numNulls"),
-    NUM_DVS((short)4, "numDVs");
+    NUM_DVS((short)4, "numDVs"),
+    BIT_VECTORS((short)5, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -82,6 +85,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
           return NUM_NULLS;
         case 4: // NUM_DVS
           return NUM_DVS;
+        case 5: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -125,7 +130,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
   private static final int __NUMNULLS_ISSET_ID = 0;
   private static final int __NUMDVS_ISSET_ID = 1;
   private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -137,6 +142,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DateColumnStatsData.class, metaDataMap);
   }
@@ -168,6 +175,9 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     }
     this.numNulls = other.numNulls;
     this.numDVs = other.numDVs;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public DateColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     this.numNulls = 0;
     setNumDVsIsSet(false);
     this.numDVs = 0;
+    this.bitVectors = null;
   }
 
   public Date getLowValue() {
@@ -274,6 +285,29 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case LOW_VALUE:
@@ -308,6 +342,14 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -325,6 +367,9 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     case NUM_DVS:
       return getNumDVs();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -344,6 +389,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
       return isSetNumNulls();
     case NUM_DVS:
       return isSetNumDVs();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -397,6 +444,15 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -424,6 +480,11 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     if (present_numDVs)
       list.add(numDVs);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -475,6 +536,16 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -522,6 +593,16 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
     sb.append("numDVs:");
     sb.append(this.numDVs);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -615,6 +696,14 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 5: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -648,6 +737,13 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
       oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
       oprot.writeI64(struct.numDVs);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -674,13 +770,19 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
       if (struct.isSetHighValue()) {
         optionals.set(1);
       }
-      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetBitVectors()) {
+        optionals.set(2);
+      }
+      oprot.writeBitSet(optionals, 3);
       if (struct.isSetLowValue()) {
         struct.lowValue.write(oprot);
       }
       if (struct.isSetHighValue()) {
         struct.highValue.write(oprot);
       }
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -690,7 +792,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
       struct.setNumNullsIsSet(true);
       struct.numDVs = iprot.readI64();
       struct.setNumDVsIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
+      BitSet incoming = iprot.readBitSet(3);
       if (incoming.get(0)) {
         struct.lowValue = new Date();
         struct.lowValue.read(iprot);
@@ -701,6 +803,10 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
         struct.highValue.read(iprot);
         struct.setHighValueIsSet(true);
       }
+      if (incoming.get(2)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
index 720176a..ec363dc 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
@@ -42,6 +42,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
   private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
   private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -53,13 +54,15 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
   private Decimal highValue; // optional
   private long numNulls; // required
   private long numDVs; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     LOW_VALUE((short)1, "lowValue"),
     HIGH_VALUE((short)2, "highValue"),
     NUM_NULLS((short)3, "numNulls"),
-    NUM_DVS((short)4, "numDVs");
+    NUM_DVS((short)4, "numDVs"),
+    BIT_VECTORS((short)5, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -82,6 +85,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
           return NUM_NULLS;
         case 4: // NUM_DVS
           return NUM_DVS;
+        case 5: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -125,7 +130,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
   private static final int __NUMNULLS_ISSET_ID = 0;
   private static final int __NUMDVS_ISSET_ID = 1;
   private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -137,6 +142,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DecimalColumnStatsData.class, metaDataMap);
   }
@@ -168,6 +175,9 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     }
     this.numNulls = other.numNulls;
     this.numDVs = other.numDVs;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public DecimalColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     this.numNulls = 0;
     setNumDVsIsSet(false);
     this.numDVs = 0;
+    this.bitVectors = null;
   }
 
   public Decimal getLowValue() {
@@ -274,6 +285,29 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case LOW_VALUE:
@@ -308,6 +342,14 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -325,6 +367,9 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     case NUM_DVS:
       return getNumDVs();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -344,6 +389,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
       return isSetNumNulls();
     case NUM_DVS:
       return isSetNumDVs();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -397,6 +444,15 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -424,6 +480,11 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     if (present_numDVs)
       list.add(numDVs);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -475,6 +536,16 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -522,6 +593,16 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
     sb.append("numDVs:");
     sb.append(this.numDVs);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -615,6 +696,14 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 5: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -648,6 +737,13 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
       oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
       oprot.writeI64(struct.numDVs);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -674,13 +770,19 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
       if (struct.isSetHighValue()) {
         optionals.set(1);
       }
-      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetBitVectors()) {
+        optionals.set(2);
+      }
+      oprot.writeBitSet(optionals, 3);
       if (struct.isSetLowValue()) {
         struct.lowValue.write(oprot);
       }
       if (struct.isSetHighValue()) {
         struct.highValue.write(oprot);
       }
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -690,7 +792,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
       struct.setNumNullsIsSet(true);
       struct.numDVs = iprot.readI64();
       struct.setNumDVsIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
+      BitSet incoming = iprot.readBitSet(3);
       if (incoming.get(0)) {
         struct.lowValue = new Decimal();
         struct.lowValue.read(iprot);
@@ -701,6 +803,10 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
         struct.highValue.read(iprot);
         struct.setHighValueIsSet(true);
       }
+      if (incoming.get(2)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
index 5d48b5d..e3340e4 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
@@ -42,6 +42,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
   private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
   private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -53,13 +54,15 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
   private double highValue; // optional
   private long numNulls; // required
   private long numDVs; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     LOW_VALUE((short)1, "lowValue"),
     HIGH_VALUE((short)2, "highValue"),
     NUM_NULLS((short)3, "numNulls"),
-    NUM_DVS((short)4, "numDVs");
+    NUM_DVS((short)4, "numDVs"),
+    BIT_VECTORS((short)5, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -82,6 +85,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
           return NUM_NULLS;
         case 4: // NUM_DVS
           return NUM_DVS;
+        case 5: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -127,7 +132,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
   private static final int __NUMNULLS_ISSET_ID = 2;
   private static final int __NUMDVS_ISSET_ID = 3;
   private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -139,6 +144,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DoubleColumnStatsData.class, metaDataMap);
   }
@@ -166,6 +173,9 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     this.highValue = other.highValue;
     this.numNulls = other.numNulls;
     this.numDVs = other.numDVs;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public DoubleColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     this.numNulls = 0;
     setNumDVsIsSet(false);
     this.numDVs = 0;
+    this.bitVectors = null;
   }
 
   public double getLowValue() {
@@ -272,6 +283,29 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case LOW_VALUE:
@@ -306,6 +340,14 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -323,6 +365,9 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     case NUM_DVS:
       return getNumDVs();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -342,6 +387,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
       return isSetNumNulls();
     case NUM_DVS:
       return isSetNumDVs();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -395,6 +442,15 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -422,6 +478,11 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     if (present_numDVs)
       list.add(numDVs);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -473,6 +534,16 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -512,6 +583,16 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
     sb.append("numDVs:");
     sb.append(this.numDVs);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -597,6 +678,14 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 5: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -626,6 +715,13 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
       oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
       oprot.writeI64(struct.numDVs);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -652,13 +748,19 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
       if (struct.isSetHighValue()) {
         optionals.set(1);
       }
-      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetBitVectors()) {
+        optionals.set(2);
+      }
+      oprot.writeBitSet(optionals, 3);
       if (struct.isSetLowValue()) {
         oprot.writeDouble(struct.lowValue);
       }
       if (struct.isSetHighValue()) {
         oprot.writeDouble(struct.highValue);
       }
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -668,7 +770,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
       struct.setNumNullsIsSet(true);
       struct.numDVs = iprot.readI64();
       struct.setNumDVsIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
+      BitSet incoming = iprot.readBitSet(3);
       if (incoming.get(0)) {
         struct.lowValue = iprot.readDouble();
         struct.setLowValueIsSet(true);
@@ -677,6 +779,10 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
         struct.highValue = iprot.readDouble();
         struct.setHighValueIsSet(true);
       }
+      if (incoming.get(2)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
index 2f41c5a..4404706 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
@@ -42,6 +42,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
   private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.I64, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
   private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -53,13 +54,15 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
   private long highValue; // optional
   private long numNulls; // required
   private long numDVs; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     LOW_VALUE((short)1, "lowValue"),
     HIGH_VALUE((short)2, "highValue"),
     NUM_NULLS((short)3, "numNulls"),
-    NUM_DVS((short)4, "numDVs");
+    NUM_DVS((short)4, "numDVs"),
+    BIT_VECTORS((short)5, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -82,6 +85,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
           return NUM_NULLS;
         case 4: // NUM_DVS
           return NUM_DVS;
+        case 5: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -127,7 +132,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
   private static final int __NUMNULLS_ISSET_ID = 2;
   private static final int __NUMDVS_ISSET_ID = 3;
   private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+  private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -139,6 +144,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LongColumnStatsData.class, metaDataMap);
   }
@@ -166,6 +173,9 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     this.highValue = other.highValue;
     this.numNulls = other.numNulls;
     this.numDVs = other.numDVs;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public LongColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     this.numNulls = 0;
     setNumDVsIsSet(false);
     this.numDVs = 0;
+    this.bitVectors = null;
   }
 
   public long getLowValue() {
@@ -272,6 +283,29 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case LOW_VALUE:
@@ -306,6 +340,14 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -323,6 +365,9 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     case NUM_DVS:
       return getNumDVs();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -342,6 +387,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
       return isSetNumNulls();
     case NUM_DVS:
       return isSetNumDVs();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -395,6 +442,15 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -422,6 +478,11 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     if (present_numDVs)
       list.add(numDVs);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -473,6 +534,16 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -512,6 +583,16 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
     sb.append("numDVs:");
     sb.append(this.numDVs);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -597,6 +678,14 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 5: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -626,6 +715,13 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
       oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
       oprot.writeI64(struct.numDVs);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -652,13 +748,19 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
       if (struct.isSetHighValue()) {
         optionals.set(1);
       }
-      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetBitVectors()) {
+        optionals.set(2);
+      }
+      oprot.writeBitSet(optionals, 3);
       if (struct.isSetLowValue()) {
         oprot.writeI64(struct.lowValue);
       }
       if (struct.isSetHighValue()) {
         oprot.writeI64(struct.highValue);
       }
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -668,7 +770,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
       struct.setNumNullsIsSet(true);
       struct.numDVs = iprot.readI64();
       struct.setNumDVsIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
+      BitSet incoming = iprot.readBitSet(3);
       if (incoming.get(0)) {
         struct.lowValue = iprot.readI64();
         struct.setLowValueIsSet(true);
@@ -677,6 +779,10 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
         struct.highValue = iprot.readI64();
         struct.setHighValueIsSet(true);
       }
+      if (incoming.get(2)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
index bd8a922..c9afe87 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
@@ -42,6 +42,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
   private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
   private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
   private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -53,13 +54,15 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
   private double avgColLen; // required
   private long numNulls; // required
   private long numDVs; // required
+  private String bitVectors; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
     MAX_COL_LEN((short)1, "maxColLen"),
     AVG_COL_LEN((short)2, "avgColLen"),
     NUM_NULLS((short)3, "numNulls"),
-    NUM_DVS((short)4, "numDVs");
+    NUM_DVS((short)4, "numDVs"),
+    BIT_VECTORS((short)5, "bitVectors");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -82,6 +85,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
           return NUM_NULLS;
         case 4: // NUM_DVS
           return NUM_DVS;
+        case 5: // BIT_VECTORS
+          return BIT_VECTORS;
         default:
           return null;
       }
@@ -127,6 +132,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
   private static final int __NUMNULLS_ISSET_ID = 2;
   private static final int __NUMDVS_ISSET_ID = 3;
   private byte __isset_bitfield = 0;
+  private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -138,6 +144,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
     tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StringColumnStatsData.class, metaDataMap);
   }
@@ -171,6 +179,9 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     this.avgColLen = other.avgColLen;
     this.numNulls = other.numNulls;
     this.numDVs = other.numDVs;
+    if (other.isSetBitVectors()) {
+      this.bitVectors = other.bitVectors;
+    }
   }
 
   public StringColumnStatsData deepCopy() {
@@ -187,6 +198,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     this.numNulls = 0;
     setNumDVsIsSet(false);
     this.numDVs = 0;
+    this.bitVectors = null;
   }
 
   public long getMaxColLen() {
@@ -277,6 +289,29 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
   }
 
+  public String getBitVectors() {
+    return this.bitVectors;
+  }
+
+  public void setBitVectors(String bitVectors) {
+    this.bitVectors = bitVectors;
+  }
+
+  public void unsetBitVectors() {
+    this.bitVectors = null;
+  }
+
+  /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+  public boolean isSetBitVectors() {
+    return this.bitVectors != null;
+  }
+
+  public void setBitVectorsIsSet(boolean value) {
+    if (!value) {
+      this.bitVectors = null;
+    }
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case MAX_COL_LEN:
@@ -311,6 +346,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
       }
       break;
 
+    case BIT_VECTORS:
+      if (value == null) {
+        unsetBitVectors();
+      } else {
+        setBitVectors((String)value);
+      }
+      break;
+
     }
   }
 
@@ -328,6 +371,9 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     case NUM_DVS:
       return getNumDVs();
 
+    case BIT_VECTORS:
+      return getBitVectors();
+
     }
     throw new IllegalStateException();
   }
@@ -347,6 +393,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
       return isSetNumNulls();
     case NUM_DVS:
       return isSetNumDVs();
+    case BIT_VECTORS:
+      return isSetBitVectors();
     }
     throw new IllegalStateException();
   }
@@ -400,6 +448,15 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
         return false;
     }
 
+    boolean this_present_bitVectors = true && this.isSetBitVectors();
+    boolean that_present_bitVectors = true && that.isSetBitVectors();
+    if (this_present_bitVectors || that_present_bitVectors) {
+      if (!(this_present_bitVectors && that_present_bitVectors))
+        return false;
+      if (!this.bitVectors.equals(that.bitVectors))
+        return false;
+    }
+
     return true;
   }
 
@@ -427,6 +484,11 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     if (present_numDVs)
       list.add(numDVs);
 
+    boolean present_bitVectors = true && (isSetBitVectors());
+    list.add(present_bitVectors);
+    if (present_bitVectors)
+      list.add(bitVectors);
+
     return list.hashCode();
   }
 
@@ -478,6 +540,16 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetBitVectors()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -513,6 +585,16 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
     sb.append("numDVs:");
     sb.append(this.numDVs);
     first = false;
+    if (isSetBitVectors()) {
+      if (!first) sb.append(", ");
+      sb.append("bitVectors:");
+      if (this.bitVectors == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.bitVectors);
+      }
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -606,6 +688,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 5: // BIT_VECTORS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.bitVectors = iprot.readString();
+              struct.setBitVectorsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -631,6 +721,13 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
       oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
       oprot.writeI64(struct.numDVs);
       oprot.writeFieldEnd();
+      if (struct.bitVectors != null) {
+        if (struct.isSetBitVectors()) {
+          oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+          oprot.writeString(struct.bitVectors);
+          oprot.writeFieldEnd();
+        }
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -652,6 +749,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
       oprot.writeDouble(struct.avgColLen);
       oprot.writeI64(struct.numNulls);
       oprot.writeI64(struct.numDVs);
+      BitSet optionals = new BitSet();
+      if (struct.isSetBitVectors()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetBitVectors()) {
+        oprot.writeString(struct.bitVectors);
+      }
     }
 
     @Override
@@ -665,6 +770,11 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
       struct.setNumNullsIsSet(true);
       struct.numDVs = iprot.readI64();
       struct.setNumDVsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.bitVectors = iprot.readString();
+        struct.setBitVectorsIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-php/metastore/Types.php
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php
index 380e6d0..57d1daf 100644
--- a/metastore/src/gen/thrift/gen-php/metastore/Types.php
+++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php
@@ -5624,6 +5624,10 @@ class BooleanColumnStatsData {
    * @var int
    */
   public $numNulls = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -5640,6 +5644,10 @@ class BooleanColumnStatsData {
           'var' => 'numNulls',
           'type' => TType::I64,
           ),
+        4 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -5652,6 +5660,9 @@ class BooleanColumnStatsData {
       if (isset($vals['numNulls'])) {
         $this->numNulls = $vals['numNulls'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -5695,6 +5706,13 @@ class BooleanColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 4:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -5723,6 +5741,11 @@ class BooleanColumnStatsData {
       $xfer += $output->writeI64($this->numNulls);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -5749,6 +5772,10 @@ class DoubleColumnStatsData {
    * @var int
    */
   public $numDVs = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -5769,6 +5796,10 @@ class DoubleColumnStatsData {
           'var' => 'numDVs',
           'type' => TType::I64,
           ),
+        5 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -5784,6 +5815,9 @@ class DoubleColumnStatsData {
       if (isset($vals['numDVs'])) {
         $this->numDVs = $vals['numDVs'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -5834,6 +5868,13 @@ class DoubleColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 5:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -5867,6 +5908,11 @@ class DoubleColumnStatsData {
       $xfer += $output->writeI64($this->numDVs);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -5893,6 +5939,10 @@ class LongColumnStatsData {
    * @var int
    */
   public $numDVs = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -5913,6 +5963,10 @@ class LongColumnStatsData {
           'var' => 'numDVs',
           'type' => TType::I64,
           ),
+        5 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -5928,6 +5982,9 @@ class LongColumnStatsData {
       if (isset($vals['numDVs'])) {
         $this->numDVs = $vals['numDVs'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -5978,6 +6035,13 @@ class LongColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 5:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -6011,6 +6075,11 @@ class LongColumnStatsData {
       $xfer += $output->writeI64($this->numDVs);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -6037,6 +6106,10 @@ class StringColumnStatsData {
    * @var int
    */
   public $numDVs = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -6057,6 +6130,10 @@ class StringColumnStatsData {
           'var' => 'numDVs',
           'type' => TType::I64,
           ),
+        5 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -6072,6 +6149,9 @@ class StringColumnStatsData {
       if (isset($vals['numDVs'])) {
         $this->numDVs = $vals['numDVs'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -6122,6 +6202,13 @@ class StringColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 5:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -6155,6 +6242,11 @@ class StringColumnStatsData {
       $xfer += $output->writeI64($this->numDVs);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -6177,6 +6269,10 @@ class BinaryColumnStatsData {
    * @var int
    */
   public $numNulls = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -6193,6 +6289,10 @@ class BinaryColumnStatsData {
           'var' => 'numNulls',
           'type' => TType::I64,
           ),
+        4 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -6205,6 +6305,9 @@ class BinaryColumnStatsData {
       if (isset($vals['numNulls'])) {
         $this->numNulls = $vals['numNulls'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -6248,6 +6351,13 @@ class BinaryColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 4:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -6276,6 +6386,11 @@ class BinaryColumnStatsData {
       $xfer += $output->writeI64($this->numNulls);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -6400,6 +6515,10 @@ class DecimalColumnStatsData {
    * @var int
    */
   public $numDVs = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -6422,6 +6541,10 @@ class DecimalColumnStatsData {
           'var' => 'numDVs',
           'type' => TType::I64,
           ),
+        5 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -6437,6 +6560,9 @@ class DecimalColumnStatsData {
       if (isset($vals['numDVs'])) {
         $this->numDVs = $vals['numDVs'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -6489,6 +6615,13 @@ class DecimalColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 5:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -6528,6 +6661,11 @@ class DecimalColumnStatsData {
       $xfer += $output->writeI64($this->numDVs);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;
@@ -6629,6 +6767,10 @@ class DateColumnStatsData {
    * @var int
    */
   public $numDVs = null;
+  /**
+   * @var string
+   */
+  public $bitVectors = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -6651,6 +6793,10 @@ class DateColumnStatsData {
           'var' => 'numDVs',
           'type' => TType::I64,
           ),
+        5 => array(
+          'var' => 'bitVectors',
+          'type' => TType::STRING,
+          ),
         );
     }
     if (is_array($vals)) {
@@ -6666,6 +6812,9 @@ class DateColumnStatsData {
       if (isset($vals['numDVs'])) {
         $this->numDVs = $vals['numDVs'];
       }
+      if (isset($vals['bitVectors'])) {
+        $this->bitVectors = $vals['bitVectors'];
+      }
     }
   }
 
@@ -6718,6 +6867,13 @@ class DateColumnStatsData {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 5:
+          if ($ftype == TType::STRING) {
+            $xfer += $input->readString($this->bitVectors);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -6757,6 +6913,11 @@ class DateColumnStatsData {
       $xfer += $output->writeI64($this->numDVs);
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->bitVectors !== null) {
+      $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+      $xfer += $output->writeString($this->bitVectors);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;