You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/01/29 06:25:54 UTC
[1/4] hive git commit: HIVE-12763: Use bit vector to track NDV
(Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Repository: hive
Updated Branches:
refs/heads/master 0c7f2d66b -> 7b2f6703f
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
new file mode 100644
index 0000000..b0d7662
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
@@ -0,0 +1,634 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.FunctionType;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.metastore.api.ResourceType;
+import org.apache.hadoop.hive.metastore.api.ResourceUri;
+import org.apache.hadoop.hive.metastore.api.Role;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ *
+ */
+public class TestHBaseStoreBitVector {
+ private static final Logger LOG = LoggerFactory.getLogger(TestHBaseStoreBitVector.class.getName());
+ static Map<String, String> emptyParameters = new HashMap<String, String>();
+ // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key
+ static final int NUM_PART_KEYS = 1;
+ static final int NUM_PARTITIONS = 5;
+ static final String DB = "db";
+ static final String TBL = "tbl";
+ static final String COL = "col";
+ static final String PART_KEY_PREFIX = "part";
+ static final String PART_VAL_PREFIX = "val";
+ static final String PART_KV_SEPARATOR = "=";
+ static final List<String> PART_KEYS = new ArrayList<String>();
+ static final List<String> PART_VALS = new ArrayList<String>();
+ // Initialize mock partitions
+ static {
+ for (int i = 1; i <= NUM_PART_KEYS; i++) {
+ PART_KEYS.add(PART_KEY_PREFIX + i);
+ }
+ for (int i = 1; i <= NUM_PARTITIONS; i++) {
+ PART_VALS.add(PART_VAL_PREFIX + i);
+ }
+ }
+ static final long DEFAULT_TIME = System.currentTimeMillis();
+ static final String PART_KEY = "part";
+ static final String LONG_COL = "longCol";
+ static final String LONG_TYPE = "long";
+ static final String INT_TYPE = "int";
+ static final String INT_VAL = "1234";
+ static final String DOUBLE_COL = "doubleCol";
+ static final String DOUBLE_TYPE = "double";
+ static final String DOUBLE_VAL = "3.1415";
+ static final String STRING_COL = "stringCol";
+ static final String STRING_TYPE = "string";
+ static final String STRING_VAL = "stringval";
+ static final String DECIMAL_COL = "decimalCol";
+ static final String DECIMAL_TYPE = "decimal(5,3)";
+ static final String DECIMAL_VAL = "12.123";
+ static List<ColumnStatisticsObj> longColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+ NUM_PARTITIONS);
+ static List<ColumnStatisticsObj> doubleColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+ NUM_PARTITIONS);
+ static List<ColumnStatisticsObj> stringColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+ NUM_PARTITIONS);
+ static List<ColumnStatisticsObj> decimalColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+ NUM_PARTITIONS);
+
+ @Rule public ExpectedException thrown = ExpectedException.none();
+ @Mock HTableInterface htable;
+ SortedMap<String, Cell> rows = new TreeMap<>();
+ HBaseStore store;
+
+
+ @BeforeClass
+ public static void beforeTest() {
+ // All data intitializations
+ populateMockStats();
+ }
+
+ private static void populateMockStats() {
+ ColumnStatisticsObj statsObj;
+ // Add NUM_PARTITIONS ColumnStatisticsObj of each type
+ // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition
+ // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it
+ for (int i = 0; i < NUM_PARTITIONS; i++) {
+ statsObj = mockLongStats(i);
+ longColStatsObjs.add(statsObj);
+ statsObj = mockDoubleStats(i);
+ doubleColStatsObjs.add(statsObj);
+ statsObj = mockStringStats(i);
+ stringColStatsObjs.add(statsObj);
+ statsObj = mockDecimalStats(i);
+ decimalColStatsObjs.add(statsObj);
+ }
+ }
+
+ private static ColumnStatisticsObj mockLongStats(int i) {
+ long high = 120938479124L + 100*i;
+ long low = -12341243213412124L - 50*i;
+ long nulls = 23 + i;
+ long dVs = 213L + 10*i;
+ String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{1, 2, 3, 4, 5, 6, 7, 8}";
+ ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+ colStatsObj.setColName(LONG_COL);
+ colStatsObj.setColType(LONG_TYPE);
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData longData = new LongColumnStatsData();
+ longData.setHighValue(high);
+ longData.setLowValue(low);
+ longData.setNumNulls(nulls);
+ longData.setNumDVs(dVs);
+ longData.setBitVectors(bitVectors);
+ data.setLongStats(longData);
+ colStatsObj.setStatsData(data);
+ return colStatsObj;
+ }
+
+ private static ColumnStatisticsObj mockDoubleStats(int i) {
+ double high = 123423.23423 + 100*i;
+ double low = 0.00001234233 - 50*i;
+ long nulls = 92 + i;
+ long dVs = 1234123421L + 10*i;
+ String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 2, 3, 4, 5, 6, 7, 8}";
+ ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+ colStatsObj.setColName(DOUBLE_COL);
+ colStatsObj.setColType(DOUBLE_TYPE);
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
+ doubleData.setHighValue(high);
+ doubleData.setLowValue(low);
+ doubleData.setNumNulls(nulls);
+ doubleData.setNumDVs(dVs);
+ doubleData.setBitVectors(bitVectors);
+ data.setDoubleStats(doubleData);
+ colStatsObj.setStatsData(data);
+ return colStatsObj;
+ }
+
+ private static ColumnStatisticsObj mockStringStats(int i) {
+ long maxLen = 1234 + 10*i;
+ double avgLen = 32.3 + i;
+ long nulls = 987 + 10*i;
+ long dVs = 906 + i;
+ String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 3, 4, 5, 6, 7, 8}";
+ ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+ colStatsObj.setColName(STRING_COL);
+ colStatsObj.setColType(STRING_TYPE);
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ StringColumnStatsData stringData = new StringColumnStatsData();
+ stringData.setMaxColLen(maxLen);
+ stringData.setAvgColLen(avgLen);
+ stringData.setNumNulls(nulls);
+ stringData.setNumDVs(dVs);
+ stringData.setBitVectors(bitVectors);
+ data.setStringStats(stringData);
+ colStatsObj.setStatsData(data);
+ return colStatsObj;
+ }
+
+ private static ColumnStatisticsObj mockDecimalStats(int i) {
+ Decimal high = new Decimal();
+ high.setScale((short)3);
+ String strHigh = String.valueOf(3876 + 100*i);
+ high.setUnscaled(strHigh.getBytes());
+ Decimal low = new Decimal();
+ low.setScale((short)3);
+ String strLow = String.valueOf(38 + i);
+ low.setUnscaled(strLow.getBytes());
+ long nulls = 13 + i;
+ long dVs = 923947293L + 100*i;
+ String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
+ ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+ colStatsObj.setColName(DECIMAL_COL);
+ colStatsObj.setColType(DECIMAL_TYPE);
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
+ decimalData.setHighValue(high);
+ decimalData.setLowValue(low);
+ decimalData.setNumNulls(nulls);
+ decimalData.setNumDVs(dVs);
+ decimalData.setBitVectors(bitVectors);
+ data.setDecimalStats(decimalData);
+ colStatsObj.setStatsData(data);
+ return colStatsObj;
+ }
+
+ @AfterClass
+ public static void afterTest() {
+ }
+
+
+ @Before
+ public void init() throws IOException {
+ MockitoAnnotations.initMocks(this);
+ HiveConf conf = new HiveConf();
+ conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+ store = MockUtils.init(conf, htable, rows);
+ }
+
+ @Test
+ public void longTableStatistics() throws Exception {
+ createMockTable(LONG_COL, LONG_TYPE);
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for table level stats
+ ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = longColStatsObjs.get(0);
+ LongColumnStatsData longData = obj.getStatsData().getLongStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ store.updateTableColumnStatistics(stats);
+ // Get from DB
+ ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+ Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+ // Compare LongColumnStatsData
+ LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+ Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+ Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+ Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+ Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+ Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void doubleTableStatistics() throws Exception {
+ createMockTable(DOUBLE_COL, DOUBLE_TYPE);
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for table level stats
+ ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+ DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ store.updateTableColumnStatistics(stats);
+ // Get from DB
+ ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+ Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+ // Compare DoubleColumnStatsData
+ DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+ Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+ Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+ Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+ Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+ Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void stringTableStatistics() throws Exception {
+ createMockTable(STRING_COL, STRING_TYPE);
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for table level stats
+ ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+ StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ store.updateTableColumnStatistics(stats);
+ // Get from DB
+ ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL));
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+ Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+ // Compare StringColumnStatsData
+ StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+ Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+ Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+ Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+ Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+ Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void decimalTableStatistics() throws Exception {
+ createMockTable(DECIMAL_COL, DECIMAL_TYPE);
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for table level stats
+ ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+ DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ store.updateTableColumnStatistics(stats);
+ // Get from DB
+ ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+ Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+ // Compare DecimalColumnStatsData
+ DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+ Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+ Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+ Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+ Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+ Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void longPartitionStatistics() throws Exception {
+ createMockTableAndPartition(INT_TYPE, INT_VAL);
+ // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB
+ // Because of the way our mock implementation works we actually need to not create the table
+ // before we set statistics on it.
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for partition level stats
+ ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL);
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = longColStatsObjs.get(0);
+ LongColumnStatsData longData = obj.getStatsData().getLongStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ List<String> parVals = new ArrayList<String>();
+ parVals.add(INT_VAL);
+ store.updatePartitionColumnStatistics(stats, parVals);
+ // Get from DB
+ List<String> partNames = new ArrayList<String>();
+ partNames.add(desc.getPartName());
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(obj.getColName());
+ List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(1, statsFromDB.size());
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+ Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+ // Compare LongColumnStatsData
+ LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+ Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+ Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+ Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+ Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+ Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void doublePartitionStatistics() throws Exception {
+ createMockTableAndPartition(DOUBLE_TYPE, DOUBLE_VAL);
+ // Add partition stats for: DOUBLE_COL and partition: {PART_KEY, DOUBLE_VAL} to DB
+ // Because of the way our mock implementation works we actually need to not create the table
+ // before we set statistics on it.
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for partition level stats
+ ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DOUBLE_VAL);
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+ DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ List<String> parVals = new ArrayList<String>();
+ parVals.add(DOUBLE_VAL);
+ store.updatePartitionColumnStatistics(stats, parVals);
+ // Get from DB
+ List<String> partNames = new ArrayList<String>();
+ partNames.add(desc.getPartName());
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(obj.getColName());
+ List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(1, statsFromDB.size());
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+ Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+ // Compare DoubleColumnStatsData
+ DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+ Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+ Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+ Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+ Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+ Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void stringPartitionStatistics() throws Exception {
+ createMockTableAndPartition(STRING_TYPE, STRING_VAL);
+ // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
+ // Because of the way our mock implementation works we actually need to not create the table
+ // before we set statistics on it.
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for partition level stats
+ ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+ StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ List<String> parVals = new ArrayList<String>();
+ parVals.add(STRING_VAL);
+ store.updatePartitionColumnStatistics(stats, parVals);
+ // Get from DB
+ List<String> partNames = new ArrayList<String>();
+ partNames.add(desc.getPartName());
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(obj.getColName());
+ List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(1, statsFromDB.size());
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+ Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+ // Compare StringColumnStatsData
+ StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+ Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+ Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+ Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+ Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+ Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+ }
+
+ @Test
+ public void decimalPartitionStatistics() throws Exception {
+ createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
+ // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
+ // Because of the way our mock implementation works we actually need to not create the table
+ // before we set statistics on it.
+ ColumnStatistics stats = new ColumnStatistics();
+ // Get a default ColumnStatisticsDesc for partition level stats
+ ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
+ stats.setStatsDesc(desc);
+ // Get one of the pre-created ColumnStatisticsObj
+ ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+ DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+ // Add to DB
+ stats.addToStatsObj(obj);
+ List<String> parVals = new ArrayList<String>();
+ parVals.add(DECIMAL_VAL);
+ store.updatePartitionColumnStatistics(stats, parVals);
+ // Get from DB
+ List<String> partNames = new ArrayList<String>();
+ partNames.add(desc.getPartName());
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(obj.getColName());
+ List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+ // Compare ColumnStatisticsDesc
+ Assert.assertEquals(1, statsFromDB.size());
+ Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+ Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+ Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+ Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+ // Compare ColumnStatisticsObj
+ Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+ ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+ ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+ // Compare ColumnStatisticsData
+ Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+ // Compare DecimalColumnStatsData
+ DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+ Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+ Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+ Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+ Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+ Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+ }
+
+ private Table createMockTable(String name, String type) throws Exception {
+ List<FieldSchema> cols = new ArrayList<FieldSchema>();
+ cols.add(new FieldSchema(name, type, ""));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ Map<String, String> params = new HashMap<String, String>();
+ params.put("key", "value");
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+ serde, new ArrayList<String>(), new ArrayList<Order>(), params);
+ int currentTime = (int)(System.currentTimeMillis() / 1000);
+ Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+ emptyParameters, null, null, null);
+ store.createTable(table);
+ return table;
+ }
+
+ private Table createMockTableAndPartition(String partType, String partVal) throws Exception {
+ List<FieldSchema> cols = new ArrayList<FieldSchema>();
+ cols.add(new FieldSchema("col1", partType, ""));
+ List<String> vals = new ArrayList<String>();
+ vals.add(partVal);
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ Map<String, String> params = new HashMap<String, String>();
+ params.put("key", "value");
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+ serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params);
+ int currentTime = (int)(System.currentTimeMillis() / 1000);
+ Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+ emptyParameters, null, null, null);
+ store.createTable(table);
+ Partition part = new Partition(vals, DB, TBL, currentTime, currentTime, sd,
+ emptyParameters);
+ store.addPartition(part);
+ return table;
+ }
+ /**
+ * Returns a dummy table level ColumnStatisticsDesc with default values
+ */
+ private ColumnStatisticsDesc getMockTblColStatsDesc() {
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+ desc.setLastAnalyzed(DEFAULT_TIME);
+ desc.setDbName(DB);
+ desc.setTableName(TBL);
+ desc.setIsTblLevel(true);
+ return desc;
+ }
+
+ /**
+ * Returns a dummy partition level ColumnStatisticsDesc
+ */
+ private ColumnStatisticsDesc getMockPartColStatsDesc(String partKey, String partVal) {
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+ desc.setLastAnalyzed(DEFAULT_TIME);
+ desc.setDbName(DB);
+ desc.setTableName(TBL);
+ // part1=val1
+ desc.setPartName(partKey + PART_KV_SEPARATOR + partVal);
+ desc.setIsTblLevel(false);
+ return desc;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/pom.xml
----------------------------------------------------------------------
diff --git a/ql/pom.xml b/ql/pom.xml
index 358cd2a..f19a225 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -397,11 +397,6 @@
<version>${guava.version}</version>
</dependency>
<dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <version>${protobuf.version}</version>
- </dependency>
- <dependency>
<groupId>com.googlecode.javaewah</groupId>
<artifactId>JavaEWAH</artifactId>
<version>${javaewah.version}</version>
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index 7914471..f9a9fd2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -121,6 +121,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
} else if (fName.equals("min")) {
double d = ((DoubleObjectInspector) oi).get(o);
statsObj.getStatsData().getDoubleStats().setLowValue(d);
+ } else if (fName.equals("ndvbitvector")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDoubleStats().setBitVectors(v);;
}
}
@@ -138,6 +142,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
} else if (fName.equals("min")) {
HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
+ } else if (fName.equals("ndvbitvector")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDecimalStats().setBitVectors(v);;
}
}
@@ -159,6 +167,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
} else if (fName.equals("min")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getLongStats().setLowValue(v);
+ } else if (fName.equals("ndvbitvector")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getLongStats().setBitVectors(v);;
}
}
@@ -176,6 +188,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
} else if (fName.equals("maxlength")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getStringStats().setMaxColLen(v);
+ } else if (fName.equals("ndvbitvector")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getStringStats().setBitVectors(v);;
}
}
@@ -207,6 +223,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
} else if (fName.equals("min")) {
DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o);
statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays()));
+ } else if (fName.equals("ndvbitvector")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDateStats().setBitVectors(v);;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index 1f30cbd..bb1bbad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -25,6 +25,8 @@ import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveVariableSource;
@@ -201,60 +203,6 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
throw new SemanticException ("Unknown partition key : " + partKey);
}
- private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
- int numBitVectors;
- float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
-
- if (percentageError < 0.0) {
- throw new SemanticException("hive.stats.ndv.error can't be negative");
- } else if (percentageError <= 2.4) {
- numBitVectors = 1024;
- LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
- LOG.info("Choosing 1024 bit vectors..");
- } else if (percentageError <= 3.4 ) {
- numBitVectors = 1024;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 1024 bit vectors..");
- } else if (percentageError <= 4.8) {
- numBitVectors = 512;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 512 bit vectors..");
- } else if (percentageError <= 6.8) {
- numBitVectors = 256;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 256 bit vectors..");
- } else if (percentageError <= 9.7) {
- numBitVectors = 128;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 128 bit vectors..");
- } else if (percentageError <= 13.8) {
- numBitVectors = 64;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 64 bit vectors..");
- } else if (percentageError <= 19.6) {
- numBitVectors = 32;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 32 bit vectors..");
- } else if (percentageError <= 28.2) {
- numBitVectors = 16;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 16 bit vectors..");
- } else if (percentageError <= 40.9) {
- numBitVectors = 8;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 8 bit vectors..");
- } else if (percentageError <= 61.0) {
- numBitVectors = 4;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 4 bit vectors..");
- } else {
- numBitVectors = 2;
- LOG.info("Error requested is " + percentageError + "%");
- LOG.info("Choosing 2 bit vectors..");
- }
- return numBitVectors;
- }
-
private List<String> getColumnTypes(List<String> colNames)
throws SemanticException{
List<String> colTypes = new LinkedList<String>();
@@ -396,7 +344,12 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
isTableLevel = true;
}
colType = getColumnTypes(colNames);
- int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
+ int numBitVectors;
+ try {
+ numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
+ } catch (Exception e) {
+ throw new SemanticException(e.getMessage());
+ }
rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats);
rewrittenTree = genRewrittenTree(rewrittenQuery);
} else {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b4cf58f..ea506fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -1558,4 +1559,58 @@ public class StatsUtils {
return Long.MAX_VALUE;
}
}
+
+ public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
+ int numBitVectors;
+ float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
+
+ if (percentageError < 0.0) {
+ throw new SemanticException("hive.stats.ndv.error can't be negative");
+ } else if (percentageError <= 2.4) {
+ numBitVectors = 1024;
+ LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
+ LOG.info("Choosing 1024 bit vectors..");
+ } else if (percentageError <= 3.4 ) {
+ numBitVectors = 1024;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 1024 bit vectors..");
+ } else if (percentageError <= 4.8) {
+ numBitVectors = 512;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 512 bit vectors..");
+ } else if (percentageError <= 6.8) {
+ numBitVectors = 256;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 256 bit vectors..");
+ } else if (percentageError <= 9.7) {
+ numBitVectors = 128;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 128 bit vectors..");
+ } else if (percentageError <= 13.8) {
+ numBitVectors = 64;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 64 bit vectors..");
+ } else if (percentageError <= 19.6) {
+ numBitVectors = 32;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 32 bit vectors..");
+ } else if (percentageError <= 28.2) {
+ numBitVectors = 16;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 16 bit vectors..");
+ } else if (percentageError <= 40.9) {
+ numBitVectors = 8;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 8 bit vectors..");
+ } else if (percentageError <= 61.0) {
+ numBitVectors = 4;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 4 bit vectors..");
+ } else {
+ numBitVectors = 2;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 2 bit vectors..");
+ }
+ return numBitVectors;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index 0e96f89..d6ca73f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -43,8 +43,6 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* GenericUDAFComputeStats
@@ -401,6 +399,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
foi.add(getValueObjectInspector());
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
List<String> fname = new ArrayList<String>();
fname.add("columnType");
@@ -408,11 +407,13 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
fname.add("max");
fname.add("countnulls");
fname.add("numdistinctvalues");
+ fname.add("ndvbitvector");
- result = new Object[5];
+ result = new Object[6];
result[0] = new Text();
result[3] = new LongWritable(0);
result[4] = new LongWritable(0);
+ result[5] = new Text();
return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
foi);
@@ -448,6 +449,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
serializeCommon(result);
long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0;
((LongWritable) result[4]).set(dv);
+ if (numDV != null) {
+ ((Text) result[5]).set(numDV.serialize());
+ }
return result;
}
@@ -795,6 +799,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
List<String> fname = new ArrayList<String>();
fname.add("columntype");
@@ -802,13 +807,15 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
fname.add("avglength");
fname.add("countnulls");
fname.add("numdistinctvalues");
+ fname.add("ndvbitvector");
- result = new Object[5];
+ result = new Object[6];
result[0] = new Text();
result[1] = new LongWritable(0);
result[2] = new DoubleWritable(0);
result[3] = new LongWritable(0);
result[4] = new LongWritable(0);
+ result[5] = new Text();
return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
foi);
@@ -1003,7 +1010,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
((DoubleWritable) result[2]).set(avgLength);
((LongWritable) result[3]).set(myagg.countNulls);
((LongWritable) result[4]).set(numDV);
-
+ if (myagg.numBitVectors != 0) {
+ ((Text) result[5]).set(myagg.numDV.serialize());
+ }
return result;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
index bfed116..ee1c2ae 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
@@ -422,7 +422,7 @@ from char_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@char_udf_1
#### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
PREHOOK: query: select
min(c2),
min(c4)
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index b7c9075..2545c03 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -66,7 +66,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -186,7 +186,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -199,7 +199,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1
- columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
@@ -264,7 +264,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -384,7 +384,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -397,7 +397,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1
- columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
@@ -462,7 +462,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
@@ -542,7 +542,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 9685202..39f45ae 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
- expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
@@ -177,7 +177,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
@@ -261,7 +261,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
@@ -342,7 +342,7 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index 0aadae3..4cd12c4 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -187,7 +187,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1,_col2
- columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
@@ -588,7 +588,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1,_col2
- columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out
index b57a862..d9c47d0 100644
--- a/ql/src/test/results/clientpositive/compute_stats_date.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out
@@ -47,7 +47,7 @@ select compute_stats(fl_date, 16) from tab_date
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_date
#### A masked pattern was here ####
-{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18}
+{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18,"ndvbitvector":"{0, 1, 2, 3, 4, 5}{0, 1, 2, 3}{0}{0, 1, 2, 6}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 2}{0, 1, 2, 3, 4}{0, 1, 2, 4, 5}{0, 1, 2, 3}{0, 1, 2, 3, 5}{0, 1, 2, 3, 4, 5}{0, 1, 2, 3, 4}"}
PREHOOK: query: explain
analyze table tab_date compute statistics for columns fl_date
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
index 35abb37..c204ab6 100644
--- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 18) from tab_decimal
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_decimal
#### A masked pattern was here ####
-{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13}
+{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_double.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out
index f6b4052..0a67ecd 100644
--- a/ql/src/test/results/clientpositive/compute_stats_double.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_double
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_double
#### A masked pattern was here ####
-{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11}
+{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
index f76c760..a6cb9af 100644
--- a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
@@ -34,7 +34,7 @@ POSTHOOK: query: select compute_stats(b, 16) from tab_empty
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_empty
#### A masked pattern was here ####
-{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
PREHOOK: query: select compute_stats(c, 16) from tab_empty
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_empty
@@ -43,7 +43,7 @@ POSTHOOK: query: select compute_stats(c, 16) from tab_empty
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_empty
#### A masked pattern was here ####
-{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
PREHOOK: query: select compute_stats(d, 16) from tab_empty
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_empty
@@ -52,7 +52,7 @@ POSTHOOK: query: select compute_stats(d, 16) from tab_empty
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_empty
#### A masked pattern was here ####
-{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
PREHOOK: query: select compute_stats(e, 16) from tab_empty
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_empty
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_long.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out
index 2c6171d..b6f2b10 100644
--- a/ql/src/test/results/clientpositive/compute_stats_long.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_int
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_int
#### A masked pattern was here ####
-{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11}
+{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_string.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out
index bdf9d85..fbd0e6d 100644
--- a/ql/src/test/results/clientpositive/compute_stats_string.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_string
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_string
#### A masked pattern was here ####
-{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7}
+{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
index 7fa3089..8f50a43 100644
--- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
@@ -203,7 +203,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1,_col2
- columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
index ae39d18..b46f509 100644
--- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
@@ -211,7 +211,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0,_col1,_col2
- columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
index 853bc4a..459d93b 100644
--- a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
@@ -416,7 +416,7 @@ from varchar_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@varchar_udf_1
#### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
PREHOOK: query: select
min(c2),
min(c4)
[4/4] hive git commit: HIVE-12763: Use bit vector to track NDV
(Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Posted by px...@apache.org.
HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b2f6703
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b2f6703
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b2f6703
Branch: refs/heads/master
Commit: 7b2f6703f172a71d595159c4f395f942583d66b9
Parents: 0c7f2d6
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Jan 28 21:25:33 2016 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Jan 28 21:25:33 2016 -0800
----------------------------------------------------------------------
.../hadoop/hive/common/HiveStatsUtils.java | 59 ++
.../metastore/hbase/TestHBaseSchemaTool.java | 12 +-
.../test/resources/testconfiguration.properties | 1 +
metastore/if/hive_metastore.thrift | 21 +-
metastore/pom.xml | 5 +
.../metastore/hbase/HbaseMetastoreProto.java | 411 ++++++++----
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 163 +++++
.../gen/thrift/gen-cpp/hive_metastore_types.h | 93 ++-
.../metastore/api/BinaryColumnStatsData.java | 112 +++-
.../metastore/api/BooleanColumnStatsData.java | 112 +++-
.../hive/metastore/api/DateColumnStatsData.java | 114 +++-
.../metastore/api/DecimalColumnStatsData.java | 114 +++-
.../metastore/api/DoubleColumnStatsData.java | 114 +++-
.../hive/metastore/api/LongColumnStatsData.java | 114 +++-
.../metastore/api/StringColumnStatsData.java | 112 +++-
.../src/gen/thrift/gen-php/metastore/Types.php | 161 +++++
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 105 ++-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 28 +-
.../metastore/NumDistinctValueEstimator.java | 367 +++++++++++
.../hadoop/hive/metastore/hbase/HBaseUtils.java | 152 ++---
.../hadoop/hive/metastore/hbase/StatsCache.java | 7 +-
.../stats/BinaryColumnStatsAggregator.java | 2 +-
.../stats/BooleanColumnStatsAggregator.java | 2 +-
.../hbase/stats/ColumnStatsAggregator.java | 6 +-
.../stats/ColumnStatsAggregatorFactory.java | 26 +-
.../stats/DecimalColumnStatsAggregator.java | 24 +-
.../stats/DoubleColumnStatsAggregator.java | 12 +-
.../hbase/stats/LongColumnStatsAggregator.java | 12 +-
.../stats/StringColumnStatsAggregator.java | 12 +-
.../metastore/hbase/hbase_metastore_proto.proto | 1 +
...stHBaseAggregateStatsCacheWithBitVector.java | 187 ++++++
.../hbase/TestHBaseStoreBitVector.java | 634 +++++++++++++++++++
ql/pom.xml | 5 -
.../hadoop/hive/ql/exec/ColumnStatsTask.java | 20 +
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 63 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 55 ++
.../ql/udf/generic/GenericUDAFComputeStats.java | 19 +-
.../clientpositive/char_udf1.q.java1.7.out | 2 +-
.../clientpositive/columnstats_partlvl.q.out | 16 +-
.../clientpositive/columnstats_partlvl_dp.q.out | 8 +-
.../clientpositive/columnstats_tbllvl.q.out | 4 +-
.../clientpositive/compute_stats_date.q.out | 2 +-
.../clientpositive/compute_stats_decimal.q.out | 2 +-
.../clientpositive/compute_stats_double.q.out | 2 +-
.../compute_stats_empty_table.q.out | 6 +-
.../clientpositive/compute_stats_long.q.out | 2 +-
.../clientpositive/compute_stats_string.q.out | 2 +-
.../display_colstats_tbllvl.q.out | 2 +-
.../temp_table_display_colstats_tbllvl.q.out | 2 +-
.../clientpositive/varchar_udf1.q.java1.7.out | 2 +-
50 files changed, 3131 insertions(+), 378 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
index 9193f80..7c9d72f 100644
--- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
@@ -21,9 +21,13 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* HiveStatsUtils.
@@ -32,6 +36,7 @@ import org.apache.hadoop.fs.Path;
*/
public class HiveStatsUtils {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveStatsUtils.class);
/**
* Get all file status from a root path and recursively go deep into certain levels.
@@ -73,4 +78,58 @@ public class HiveStatsUtils {
return fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER);
}
+ public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Exception {
+ int numBitVectors;
+ float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
+
+ if (percentageError < 0.0) {
+ throw new Exception("hive.stats.ndv.error can't be negative");
+ } else if (percentageError <= 2.4) {
+ numBitVectors = 1024;
+ LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
+ LOG.info("Choosing 1024 bit vectors..");
+ } else if (percentageError <= 3.4 ) {
+ numBitVectors = 1024;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 1024 bit vectors..");
+ } else if (percentageError <= 4.8) {
+ numBitVectors = 512;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 512 bit vectors..");
+ } else if (percentageError <= 6.8) {
+ numBitVectors = 256;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 256 bit vectors..");
+ } else if (percentageError <= 9.7) {
+ numBitVectors = 128;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 128 bit vectors..");
+ } else if (percentageError <= 13.8) {
+ numBitVectors = 64;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 64 bit vectors..");
+ } else if (percentageError <= 19.6) {
+ numBitVectors = 32;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 32 bit vectors..");
+ } else if (percentageError <= 28.2) {
+ numBitVectors = 16;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 16 bit vectors..");
+ } else if (percentageError <= 40.9) {
+ numBitVectors = 8;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 8 bit vectors..");
+ } else if (percentageError <= 61.0) {
+ numBitVectors = 4;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 4 bit vectors..");
+ } else {
+ numBitVectors = 2;
+ LOG.info("Error requested is " + percentageError + "%");
+ LOG.info("Choosing 2 bit vectors..");
+ }
+ return numBitVectors;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
index 9fbbf90..79c9e08 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java
@@ -468,9 +468,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
"\"tableType\":\"\"} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " +
"col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
"\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
- "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+ "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
"\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78," +
- "\"numNulls\":29,\"numDVs\":397}}}" + lsep +
+ "\"numNulls\":29,\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep +
"{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," +
"\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," +
"\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," +
@@ -519,9 +519,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
"\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " +
"stats: column col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
"\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
- "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+ "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
"\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," +
- "\"numDVs\":397}}}" + lsep, outStr.toString());
+ "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
@@ -533,9 +533,9 @@ public class TestHBaseSchemaTool extends HBaseIntegrationTests {
"\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " +
"col1: {\"colName\":\"col1\",\"colType\":\"int\"," +
"\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," +
- "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
+ "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," +
"\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," +
- "\"numDVs\":397}}}" + lsep, outStr.toString());
+ "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index ec6a2c7..f8aa146 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -393,6 +393,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
orc_ppd_basic.q,\
orc_merge_diff_fs.q,\
stats_filemetadata.q,\
+ tez_aggr_part_stats.q,\
tez_bmj_schema_evolution.q,\
tez_dml.q,\
tez_fsstat.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift
index 81837e6..9d8c092 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -325,34 +325,39 @@ struct Index {
struct BooleanColumnStatsData {
1: required i64 numTrues,
2: required i64 numFalses,
-3: required i64 numNulls
+3: required i64 numNulls,
+4: optional string bitVectors
}
struct DoubleColumnStatsData {
1: optional double lowValue,
2: optional double highValue,
3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
}
struct LongColumnStatsData {
1: optional i64 lowValue,
2: optional i64 highValue,
3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
}
struct StringColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
}
struct BinaryColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
-3: required i64 numNulls
+3: required i64 numNulls,
+4: optional string bitVectors
}
@@ -365,7 +370,8 @@ struct DecimalColumnStatsData {
1: optional Decimal lowValue,
2: optional Decimal highValue,
3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
}
struct Date {
@@ -376,7 +382,8 @@ struct DateColumnStatsData {
1: optional Date lowValue,
2: optional Date highValue,
3: required i64 numNulls,
-4: required i64 numDVs
+4: required i64 numDVs,
+5: optional string bitVectors
}
union ColumnStatisticsData {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/pom.xml
----------------------------------------------------------------------
diff --git a/metastore/pom.xml b/metastore/pom.xml
index a8e84a1..18c1f9c 100644
--- a/metastore/pom.xml
+++ b/metastore/pom.xml
@@ -44,6 +44,11 @@
<artifactId>hive-shims</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>javolution</groupId>
+ <artifactId>javolution</artifactId>
+ <version>${javolution.version}</version>
+ </dependency>
<!-- inter-project -->
<dependency>
<groupId>com.google.guava</groupId>
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
index 39a7278..3b2d7b5 100644
--- a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
+++ b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java
@@ -3918,6 +3918,21 @@ public final class HbaseMetastoreProto {
*/
com.google.protobuf.ByteString
getColumnNameBytes();
+
+ // optional string bit_vectors = 12;
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ boolean hasBitVectors();
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ java.lang.String getBitVectors();
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ com.google.protobuf.ByteString
+ getBitVectorsBytes();
}
/**
* Protobuf type {@code org.apache.hadoop.hive.metastore.hbase.ColumnStats}
@@ -4073,6 +4088,11 @@ public final class HbaseMetastoreProto {
columnName_ = input.readBytes();
break;
}
+ case 98: {
+ bitField0_ |= 0x00000800;
+ bitVectors_ = input.readBytes();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -7506,6 +7526,49 @@ public final class HbaseMetastoreProto {
}
}
+ // optional string bit_vectors = 12;
+ public static final int BIT_VECTORS_FIELD_NUMBER = 12;
+ private java.lang.Object bitVectors_;
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public boolean hasBitVectors() {
+ return ((bitField0_ & 0x00000800) == 0x00000800);
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public java.lang.String getBitVectors() {
+ java.lang.Object ref = bitVectors_;
+ if (ref instanceof java.lang.String) {
+ return (java.lang.String) ref;
+ } else {
+ com.google.protobuf.ByteString bs =
+ (com.google.protobuf.ByteString) ref;
+ java.lang.String s = bs.toStringUtf8();
+ if (bs.isValidUtf8()) {
+ bitVectors_ = s;
+ }
+ return s;
+ }
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public com.google.protobuf.ByteString
+ getBitVectorsBytes() {
+ java.lang.Object ref = bitVectors_;
+ if (ref instanceof java.lang.String) {
+ com.google.protobuf.ByteString b =
+ com.google.protobuf.ByteString.copyFromUtf8(
+ (java.lang.String) ref);
+ bitVectors_ = b;
+ return b;
+ } else {
+ return (com.google.protobuf.ByteString) ref;
+ }
+ }
+
private void initFields() {
lastAnalyzed_ = 0L;
columnType_ = "";
@@ -7518,6 +7581,7 @@ public final class HbaseMetastoreProto {
binaryStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.StringStats.getDefaultInstance();
decimalStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.DecimalStats.getDefaultInstance();
columnName_ = "";
+ bitVectors_ = "";
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -7574,6 +7638,9 @@ public final class HbaseMetastoreProto {
if (((bitField0_ & 0x00000400) == 0x00000400)) {
output.writeBytes(11, getColumnNameBytes());
}
+ if (((bitField0_ & 0x00000800) == 0x00000800)) {
+ output.writeBytes(12, getBitVectorsBytes());
+ }
getUnknownFields().writeTo(output);
}
@@ -7627,6 +7694,10 @@ public final class HbaseMetastoreProto {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(11, getColumnNameBytes());
}
+ if (((bitField0_ & 0x00000800) == 0x00000800)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeBytesSize(12, getBitVectorsBytes());
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -7795,6 +7866,8 @@ public final class HbaseMetastoreProto {
bitField0_ = (bitField0_ & ~0x00000200);
columnName_ = "";
bitField0_ = (bitField0_ & ~0x00000400);
+ bitVectors_ = "";
+ bitField0_ = (bitField0_ & ~0x00000800);
return this;
}
@@ -7891,6 +7964,10 @@ public final class HbaseMetastoreProto {
to_bitField0_ |= 0x00000400;
}
result.columnName_ = columnName_;
+ if (((from_bitField0_ & 0x00000800) == 0x00000800)) {
+ to_bitField0_ |= 0x00000800;
+ }
+ result.bitVectors_ = bitVectors_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -7944,6 +8021,11 @@ public final class HbaseMetastoreProto {
columnName_ = other.columnName_;
onChanged();
}
+ if (other.hasBitVectors()) {
+ bitField0_ |= 0x00000800;
+ bitVectors_ = other.bitVectors_;
+ onChanged();
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -8930,6 +9012,80 @@ public final class HbaseMetastoreProto {
return this;
}
+ // optional string bit_vectors = 12;
+ private java.lang.Object bitVectors_ = "";
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public boolean hasBitVectors() {
+ return ((bitField0_ & 0x00000800) == 0x00000800);
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public java.lang.String getBitVectors() {
+ java.lang.Object ref = bitVectors_;
+ if (!(ref instanceof java.lang.String)) {
+ java.lang.String s = ((com.google.protobuf.ByteString) ref)
+ .toStringUtf8();
+ bitVectors_ = s;
+ return s;
+ } else {
+ return (java.lang.String) ref;
+ }
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public com.google.protobuf.ByteString
+ getBitVectorsBytes() {
+ java.lang.Object ref = bitVectors_;
+ if (ref instanceof String) {
+ com.google.protobuf.ByteString b =
+ com.google.protobuf.ByteString.copyFromUtf8(
+ (java.lang.String) ref);
+ bitVectors_ = b;
+ return b;
+ } else {
+ return (com.google.protobuf.ByteString) ref;
+ }
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public Builder setBitVectors(
+ java.lang.String value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ bitField0_ |= 0x00000800;
+ bitVectors_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public Builder clearBitVectors() {
+ bitField0_ = (bitField0_ & ~0x00000800);
+ bitVectors_ = getDefaultInstance().getBitVectors();
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional string bit_vectors = 12;</code>
+ */
+ public Builder setBitVectorsBytes(
+ com.google.protobuf.ByteString value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ bitField0_ |= 0x00000800;
+ bitVectors_ = value;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.metastore.hbase.ColumnStats)
}
@@ -34506,7 +34662,7 @@ public final class HbaseMetastoreProto {
"grStatsInvalidatorFilter.Entry\022\021\n\trun_ev" +
"ery\030\002 \002(\003\022\034\n\024max_cache_entry_life\030\003 \002(\003\032" +
"?\n\005Entry\022\017\n\007db_name\030\001 \002(\014\022\022\n\ntable_name\030" +
- "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\335\010\n\013ColumnStats" +
+ "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\362\010\n\013ColumnStats" +
"\022\025\n\rlast_analyzed\030\001 \001(\003\022\023\n\013column_type\030\002" +
" \002(\t\022\021\n\tnum_nulls\030\003 \001(\003\022\033\n\023num_distinct_" +
"values\030\004 \001(\003\022T\n\nbool_stats\030\005 \001(\0132@.org.a",
@@ -34522,132 +34678,133 @@ public final class HbaseMetastoreProto {
"ve.metastore.hbase.ColumnStats.StringSta",
"ts\022W\n\rdecimal_stats\030\n \001(\0132@.org.apache.h" +
"adoop.hive.metastore.hbase.ColumnStats.D" +
- "ecimalStats\022\023\n\013column_name\030\013 \001(\t\0325\n\014Bool" +
- "eanStats\022\021\n\tnum_trues\030\001 \001(\003\022\022\n\nnum_false" +
- "s\030\002 \001(\003\0322\n\tLongStats\022\021\n\tlow_value\030\001 \001(\022\022" +
- "\022\n\nhigh_value\030\002 \001(\022\0324\n\013DoubleStats\022\021\n\tlo" +
- "w_value\030\001 \001(\001\022\022\n\nhigh_value\030\002 \001(\001\032=\n\013Str" +
- "ingStats\022\026\n\016max_col_length\030\001 \001(\003\022\026\n\016avg_" +
- "col_length\030\002 \001(\001\032\365\001\n\014DecimalStats\022[\n\tlow" +
- "_value\030\001 \001(\0132H.org.apache.hadoop.hive.me",
- "tastore.hbase.ColumnStats.DecimalStats.D" +
- "ecimal\022\\\n\nhigh_value\030\002 \001(\0132H.org.apache." +
- "hadoop.hive.metastore.hbase.ColumnStats." +
- "DecimalStats.Decimal\032*\n\007Decimal\022\020\n\010unsca" +
- "led\030\001 \002(\014\022\r\n\005scale\030\002 \002(\005\"\246\002\n\010Database\022\023\n" +
- "\013description\030\001 \001(\t\022\013\n\003uri\030\002 \001(\t\022F\n\nparam" +
- "eters\030\003 \001(\01322.org.apache.hadoop.hive.met" +
- "astore.hbase.Parameters\022Q\n\nprivileges\030\004 " +
- "\001(\0132=.org.apache.hadoop.hive.metastore.h" +
- "base.PrincipalPrivilegeSet\022\022\n\nowner_name",
- "\030\005 \001(\t\022I\n\nowner_type\030\006 \001(\01625.org.apache." +
- "hadoop.hive.metastore.hbase.PrincipalTyp" +
- "e\"$\n\017DelegationToken\022\021\n\ttoken_str\030\001 \002(\t\"" +
- ":\n\013FieldSchema\022\014\n\004name\030\001 \002(\t\022\014\n\004type\030\002 \002" +
- "(\t\022\017\n\007comment\030\003 \001(\t\"\206\004\n\010Function\022\022\n\nclas" +
- "s_name\030\001 \001(\t\022\022\n\nowner_name\030\002 \001(\t\022I\n\nowne" +
- "r_type\030\003 \001(\01625.org.apache.hadoop.hive.me" +
- "tastore.hbase.PrincipalType\022\023\n\013create_ti" +
- "me\030\004 \001(\022\022T\n\rfunction_type\030\005 \001(\0162=.org.ap" +
- "ache.hadoop.hive.metastore.hbase.Functio",
- "n.FunctionType\022S\n\rresource_uris\030\006 \003(\0132<." +
- "org.apache.hadoop.hive.metastore.hbase.F" +
- "unction.ResourceUri\032\254\001\n\013ResourceUri\022`\n\rr" +
- "esource_type\030\001 \002(\0162I.org.apache.hadoop.h" +
+ "ecimalStats\022\023\n\013column_name\030\013 \001(\t\022\023\n\013bit_" +
+ "vectors\030\014 \001(\t\0325\n\014BooleanStats\022\021\n\tnum_tru" +
+ "es\030\001 \001(\003\022\022\n\nnum_falses\030\002 \001(\003\0322\n\tLongStat" +
+ "s\022\021\n\tlow_value\030\001 \001(\022\022\022\n\nhigh_value\030\002 \001(\022" +
+ "\0324\n\013DoubleStats\022\021\n\tlow_value\030\001 \001(\001\022\022\n\nhi" +
+ "gh_value\030\002 \001(\001\032=\n\013StringStats\022\026\n\016max_col" +
+ "_length\030\001 \001(\003\022\026\n\016avg_col_length\030\002 \001(\001\032\365\001" +
+ "\n\014DecimalStats\022[\n\tlow_value\030\001 \001(\0132H.org.",
+ "apache.hadoop.hive.metastore.hbase.Colum" +
+ "nStats.DecimalStats.Decimal\022\\\n\nhigh_valu" +
+ "e\030\002 \001(\0132H.org.apache.hadoop.hive.metasto" +
+ "re.hbase.ColumnStats.DecimalStats.Decima" +
+ "l\032*\n\007Decimal\022\020\n\010unscaled\030\001 \002(\014\022\r\n\005scale\030" +
+ "\002 \002(\005\"\246\002\n\010Database\022\023\n\013description\030\001 \001(\t\022" +
+ "\013\n\003uri\030\002 \001(\t\022F\n\nparameters\030\003 \001(\01322.org.a" +
+ "pache.hadoop.hive.metastore.hbase.Parame" +
+ "ters\022Q\n\nprivileges\030\004 \001(\0132=.org.apache.ha" +
+ "doop.hive.metastore.hbase.PrincipalPrivi",
+ "legeSet\022\022\n\nowner_name\030\005 \001(\t\022I\n\nowner_typ" +
+ "e\030\006 \001(\01625.org.apache.hadoop.hive.metasto" +
+ "re.hbase.PrincipalType\"$\n\017DelegationToke" +
+ "n\022\021\n\ttoken_str\030\001 \002(\t\":\n\013FieldSchema\022\014\n\004n" +
+ "ame\030\001 \002(\t\022\014\n\004type\030\002 \002(\t\022\017\n\007comment\030\003 \001(\t" +
+ "\"\206\004\n\010Function\022\022\n\nclass_name\030\001 \001(\t\022\022\n\nown" +
+ "er_name\030\002 \001(\t\022I\n\nowner_type\030\003 \001(\01625.org." +
+ "apache.hadoop.hive.metastore.hbase.Princ" +
+ "ipalType\022\023\n\013create_time\030\004 \001(\022\022T\n\rfunctio" +
+ "n_type\030\005 \001(\0162=.org.apache.hadoop.hive.me",
+ "tastore.hbase.Function.FunctionType\022S\n\rr" +
+ "esource_uris\030\006 \003(\0132<.org.apache.hadoop.h" +
"ive.metastore.hbase.Function.ResourceUri" +
- ".ResourceType\022\013\n\003uri\030\002 \002(\t\".\n\014ResourceTy" +
- "pe\022\007\n\003JAR\020\001\022\010\n\004FILE\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014Fu" +
- "nctionType\022\010\n\004JAVA\020\001\"\037\n\tMasterKey\022\022\n\nmas" +
- "ter_key\030\001 \002(\t\",\n\016ParameterEntry\022\013\n\003key\030\001" +
- " \002(\t\022\r\n\005value\030\002 \002(\t\"W\n\nParameters\022I\n\tpar",
- "ameter\030\001 \003(\01326.org.apache.hadoop.hive.me" +
- "tastore.hbase.ParameterEntry\"\360\001\n\tPartiti" +
- "on\022\023\n\013create_time\030\001 \001(\003\022\030\n\020last_access_t" +
- "ime\030\002 \001(\003\022\020\n\010location\030\003 \001(\t\022I\n\rsd_parame" +
- "ters\030\004 \001(\01322.org.apache.hadoop.hive.meta" +
- "store.hbase.Parameters\022\017\n\007sd_hash\030\005 \002(\014\022" +
- "F\n\nparameters\030\006 \001(\01322.org.apache.hadoop." +
- "hive.metastore.hbase.Parameters\"\204\001\n\032Prin" +
- "cipalPrivilegeSetEntry\022\026\n\016principal_name" +
- "\030\001 \002(\t\022N\n\nprivileges\030\002 \003(\0132:.org.apache.",
- "hadoop.hive.metastore.hbase.PrivilegeGra" +
- "ntInfo\"\275\001\n\025PrincipalPrivilegeSet\022Q\n\005user" +
- "s\030\001 \003(\0132B.org.apache.hadoop.hive.metasto" +
- "re.hbase.PrincipalPrivilegeSetEntry\022Q\n\005r" +
- "oles\030\002 \003(\0132B.org.apache.hadoop.hive.meta" +
- "store.hbase.PrincipalPrivilegeSetEntry\"\260" +
- "\001\n\022PrivilegeGrantInfo\022\021\n\tprivilege\030\001 \001(\t" +
- "\022\023\n\013create_time\030\002 \001(\003\022\017\n\007grantor\030\003 \001(\t\022K" +
- "\n\014grantor_type\030\004 \001(\01625.org.apache.hadoop" +
- ".hive.metastore.hbase.PrincipalType\022\024\n\014g",
- "rant_option\030\005 \001(\010\"\374\001\n\rRoleGrantInfo\022\026\n\016p" +
- "rincipal_name\030\001 \002(\t\022M\n\016principal_type\030\002 " +
- "\002(\01625.org.apache.hadoop.hive.metastore.h" +
- "base.PrincipalType\022\020\n\010add_time\030\003 \001(\003\022\017\n\007" +
- "grantor\030\004 \001(\t\022K\n\014grantor_type\030\005 \001(\01625.or" +
- "g.apache.hadoop.hive.metastore.hbase.Pri" +
- "ncipalType\022\024\n\014grant_option\030\006 \001(\010\"^\n\021Role" +
- "GrantInfoList\022I\n\ngrant_info\030\001 \003(\01325.org." +
- "apache.hadoop.hive.metastore.hbase.RoleG" +
- "rantInfo\"\030\n\010RoleList\022\014\n\004role\030\001 \003(\t\"/\n\004Ro",
- "le\022\023\n\013create_time\030\001 \001(\003\022\022\n\nowner_name\030\002 " +
- "\001(\t\"\254\010\n\021StorageDescriptor\022A\n\004cols\030\001 \003(\0132" +
- "3.org.apache.hadoop.hive.metastore.hbase" +
- ".FieldSchema\022\024\n\014input_format\030\002 \001(\t\022\025\n\rou" +
- "tput_format\030\003 \001(\t\022\025\n\ris_compressed\030\004 \001(\010" +
- "\022\023\n\013num_buckets\030\005 \001(\021\022W\n\nserde_info\030\006 \001(" +
- "\0132C.org.apache.hadoop.hive.metastore.hba" +
- "se.StorageDescriptor.SerDeInfo\022\023\n\013bucket" +
- "_cols\030\007 \003(\t\022R\n\tsort_cols\030\010 \003(\0132?.org.apa" +
- "che.hadoop.hive.metastore.hbase.StorageD",
- "escriptor.Order\022Y\n\013skewed_info\030\t \001(\0132D.o" +
- "rg.apache.hadoop.hive.metastore.hbase.St" +
- "orageDescriptor.SkewedInfo\022!\n\031stored_as_" +
- "sub_directories\030\n \001(\010\032.\n\005Order\022\023\n\013column" +
- "_name\030\001 \002(\t\022\020\n\005order\030\002 \001(\021:\0011\032|\n\tSerDeIn" +
- "fo\022\014\n\004name\030\001 \001(\t\022\031\n\021serialization_lib\030\002 " +
- "\001(\t\022F\n\nparameters\030\003 \001(\01322.org.apache.had" +
- "oop.hive.metastore.hbase.Parameters\032\214\003\n\n" +
- "SkewedInfo\022\030\n\020skewed_col_names\030\001 \003(\t\022r\n\021" +
- "skewed_col_values\030\002 \003(\0132W.org.apache.had",
- "oop.hive.metastore.hbase.StorageDescript" +
- "or.SkewedInfo.SkewedColValueList\022\206\001\n\036ske" +
- "wed_col_value_location_maps\030\003 \003(\0132^.org." +
- "apache.hadoop.hive.metastore.hbase.Stora" +
- "geDescriptor.SkewedInfo.SkewedColValueLo" +
- "cationMap\032.\n\022SkewedColValueList\022\030\n\020skewe" +
- "d_col_value\030\001 \003(\t\0327\n\031SkewedColValueLocat" +
- "ionMap\022\013\n\003key\030\001 \003(\t\022\r\n\005value\030\002 \002(\t\"\220\004\n\005T" +
- "able\022\r\n\005owner\030\001 \001(\t\022\023\n\013create_time\030\002 \001(\003" +
- "\022\030\n\020last_access_time\030\003 \001(\003\022\021\n\tretention\030",
- "\004 \001(\003\022\020\n\010location\030\005 \001(\t\022I\n\rsd_parameters" +
- "\030\006 \001(\01322.org.apache.hadoop.hive.metastor" +
- "e.hbase.Parameters\022\017\n\007sd_hash\030\007 \002(\014\022K\n\016p" +
- "artition_keys\030\010 \003(\01323.org.apache.hadoop." +
- "hive.metastore.hbase.FieldSchema\022F\n\npara" +
- "meters\030\t \001(\01322.org.apache.hadoop.hive.me" +
- "tastore.hbase.Parameters\022\032\n\022view_origina" +
- "l_text\030\n \001(\t\022\032\n\022view_expanded_text\030\013 \001(\t" +
- "\022\022\n\ntable_type\030\014 \001(\t\022Q\n\nprivileges\030\r \001(\013" +
- "2=.org.apache.hadoop.hive.metastore.hbas",
- "e.PrincipalPrivilegeSet\022\024\n\014is_temporary\030" +
- "\016 \001(\010\"\353\004\n\026PartitionKeyComparator\022\r\n\005name" +
- "s\030\001 \002(\t\022\r\n\005types\030\002 \002(\t\022S\n\002op\030\003 \003(\0132G.org" +
- ".apache.hadoop.hive.metastore.hbase.Part" +
- "itionKeyComparator.Operator\022S\n\005range\030\004 \003" +
- "(\0132D.org.apache.hadoop.hive.metastore.hb" +
- "ase.PartitionKeyComparator.Range\032(\n\004Mark" +
- "\022\r\n\005value\030\001 \002(\t\022\021\n\tinclusive\030\002 \002(\010\032\272\001\n\005R" +
- "ange\022\013\n\003key\030\001 \002(\t\022R\n\005start\030\002 \001(\0132C.org.a" +
- "pache.hadoop.hive.metastore.hbase.Partit",
- "ionKeyComparator.Mark\022P\n\003end\030\003 \001(\0132C.org" +
- ".apache.hadoop.hive.metastore.hbase.Part" +
- "itionKeyComparator.Mark\032\241\001\n\010Operator\022Z\n\004" +
- "type\030\001 \002(\0162L.org.apache.hadoop.hive.meta" +
- "store.hbase.PartitionKeyComparator.Opera" +
- "tor.Type\022\013\n\003key\030\002 \002(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Ty" +
- "pe\022\010\n\004LIKE\020\000\022\r\n\tNOTEQUALS\020\001*#\n\rPrincipal" +
- "Type\022\010\n\004USER\020\000\022\010\n\004ROLE\020\001"
+ "\032\254\001\n\013ResourceUri\022`\n\rresource_type\030\001 \002(\0162" +
+ "I.org.apache.hadoop.hive.metastore.hbase" +
+ ".Function.ResourceUri.ResourceType\022\013\n\003ur" +
+ "i\030\002 \002(\t\".\n\014ResourceType\022\007\n\003JAR\020\001\022\010\n\004FILE" +
+ "\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014FunctionType\022\010\n\004JAVA\020" +
+ "\001\"\037\n\tMasterKey\022\022\n\nmaster_key\030\001 \002(\t\",\n\016Pa" +
+ "rameterEntry\022\013\n\003key\030\001 \002(\t\022\r\n\005value\030\002 \002(\t",
+ "\"W\n\nParameters\022I\n\tparameter\030\001 \003(\01326.org." +
+ "apache.hadoop.hive.metastore.hbase.Param" +
+ "eterEntry\"\360\001\n\tPartition\022\023\n\013create_time\030\001" +
+ " \001(\003\022\030\n\020last_access_time\030\002 \001(\003\022\020\n\010locati" +
+ "on\030\003 \001(\t\022I\n\rsd_parameters\030\004 \001(\01322.org.ap" +
+ "ache.hadoop.hive.metastore.hbase.Paramet" +
+ "ers\022\017\n\007sd_hash\030\005 \002(\014\022F\n\nparameters\030\006 \001(\013" +
+ "22.org.apache.hadoop.hive.metastore.hbas" +
+ "e.Parameters\"\204\001\n\032PrincipalPrivilegeSetEn" +
+ "try\022\026\n\016principal_name\030\001 \002(\t\022N\n\nprivilege",
+ "s\030\002 \003(\0132:.org.apache.hadoop.hive.metasto" +
+ "re.hbase.PrivilegeGrantInfo\"\275\001\n\025Principa" +
+ "lPrivilegeSet\022Q\n\005users\030\001 \003(\0132B.org.apach" +
+ "e.hadoop.hive.metastore.hbase.PrincipalP" +
+ "rivilegeSetEntry\022Q\n\005roles\030\002 \003(\0132B.org.ap" +
+ "ache.hadoop.hive.metastore.hbase.Princip" +
+ "alPrivilegeSetEntry\"\260\001\n\022PrivilegeGrantIn" +
+ "fo\022\021\n\tprivilege\030\001 \001(\t\022\023\n\013create_time\030\002 \001" +
+ "(\003\022\017\n\007grantor\030\003 \001(\t\022K\n\014grantor_type\030\004 \001(" +
+ "\01625.org.apache.hadoop.hive.metastore.hba",
+ "se.PrincipalType\022\024\n\014grant_option\030\005 \001(\010\"\374" +
+ "\001\n\rRoleGrantInfo\022\026\n\016principal_name\030\001 \002(\t" +
+ "\022M\n\016principal_type\030\002 \002(\01625.org.apache.ha" +
+ "doop.hive.metastore.hbase.PrincipalType\022" +
+ "\020\n\010add_time\030\003 \001(\003\022\017\n\007grantor\030\004 \001(\t\022K\n\014gr" +
+ "antor_type\030\005 \001(\01625.org.apache.hadoop.hiv" +
+ "e.metastore.hbase.PrincipalType\022\024\n\014grant" +
+ "_option\030\006 \001(\010\"^\n\021RoleGrantInfoList\022I\n\ngr" +
+ "ant_info\030\001 \003(\01325.org.apache.hadoop.hive." +
+ "metastore.hbase.RoleGrantInfo\"\030\n\010RoleLis",
+ "t\022\014\n\004role\030\001 \003(\t\"/\n\004Role\022\023\n\013create_time\030\001" +
+ " \001(\003\022\022\n\nowner_name\030\002 \001(\t\"\254\010\n\021StorageDesc" +
+ "riptor\022A\n\004cols\030\001 \003(\01323.org.apache.hadoop" +
+ ".hive.metastore.hbase.FieldSchema\022\024\n\014inp" +
+ "ut_format\030\002 \001(\t\022\025\n\routput_format\030\003 \001(\t\022\025" +
+ "\n\ris_compressed\030\004 \001(\010\022\023\n\013num_buckets\030\005 \001" +
+ "(\021\022W\n\nserde_info\030\006 \001(\0132C.org.apache.hado" +
+ "op.hive.metastore.hbase.StorageDescripto" +
+ "r.SerDeInfo\022\023\n\013bucket_cols\030\007 \003(\t\022R\n\tsort" +
+ "_cols\030\010 \003(\0132?.org.apache.hadoop.hive.met",
+ "astore.hbase.StorageDescriptor.Order\022Y\n\013" +
+ "skewed_info\030\t \001(\0132D.org.apache.hadoop.hi" +
+ "ve.metastore.hbase.StorageDescriptor.Ske" +
+ "wedInfo\022!\n\031stored_as_sub_directories\030\n \001" +
+ "(\010\032.\n\005Order\022\023\n\013column_name\030\001 \002(\t\022\020\n\005orde" +
+ "r\030\002 \001(\021:\0011\032|\n\tSerDeInfo\022\014\n\004name\030\001 \001(\t\022\031\n" +
+ "\021serialization_lib\030\002 \001(\t\022F\n\nparameters\030\003" +
+ " \001(\01322.org.apache.hadoop.hive.metastore." +
+ "hbase.Parameters\032\214\003\n\nSkewedInfo\022\030\n\020skewe" +
+ "d_col_names\030\001 \003(\t\022r\n\021skewed_col_values\030\002",
+ " \003(\0132W.org.apache.hadoop.hive.metastore." +
+ "hbase.StorageDescriptor.SkewedInfo.Skewe" +
+ "dColValueList\022\206\001\n\036skewed_col_value_locat" +
+ "ion_maps\030\003 \003(\0132^.org.apache.hadoop.hive." +
+ "metastore.hbase.StorageDescriptor.Skewed" +
+ "Info.SkewedColValueLocationMap\032.\n\022Skewed" +
+ "ColValueList\022\030\n\020skewed_col_value\030\001 \003(\t\0327" +
+ "\n\031SkewedColValueLocationMap\022\013\n\003key\030\001 \003(\t" +
+ "\022\r\n\005value\030\002 \002(\t\"\220\004\n\005Table\022\r\n\005owner\030\001 \001(\t" +
+ "\022\023\n\013create_time\030\002 \001(\003\022\030\n\020last_access_tim",
+ "e\030\003 \001(\003\022\021\n\tretention\030\004 \001(\003\022\020\n\010location\030\005" +
+ " \001(\t\022I\n\rsd_parameters\030\006 \001(\01322.org.apache" +
+ ".hadoop.hive.metastore.hbase.Parameters\022" +
+ "\017\n\007sd_hash\030\007 \002(\014\022K\n\016partition_keys\030\010 \003(\013" +
+ "23.org.apache.hadoop.hive.metastore.hbas" +
+ "e.FieldSchema\022F\n\nparameters\030\t \001(\01322.org." +
+ "apache.hadoop.hive.metastore.hbase.Param" +
+ "eters\022\032\n\022view_original_text\030\n \001(\t\022\032\n\022vie" +
+ "w_expanded_text\030\013 \001(\t\022\022\n\ntable_type\030\014 \001(" +
+ "\t\022Q\n\nprivileges\030\r \001(\0132=.org.apache.hadoo",
+ "p.hive.metastore.hbase.PrincipalPrivileg" +
+ "eSet\022\024\n\014is_temporary\030\016 \001(\010\"\353\004\n\026Partition" +
+ "KeyComparator\022\r\n\005names\030\001 \002(\t\022\r\n\005types\030\002 " +
+ "\002(\t\022S\n\002op\030\003 \003(\0132G.org.apache.hadoop.hive" +
+ ".metastore.hbase.PartitionKeyComparator." +
+ "Operator\022S\n\005range\030\004 \003(\0132D.org.apache.had" +
+ "oop.hive.metastore.hbase.PartitionKeyCom" +
+ "parator.Range\032(\n\004Mark\022\r\n\005value\030\001 \002(\t\022\021\n\t" +
+ "inclusive\030\002 \002(\010\032\272\001\n\005Range\022\013\n\003key\030\001 \002(\t\022R" +
+ "\n\005start\030\002 \001(\0132C.org.apache.hadoop.hive.m",
+ "etastore.hbase.PartitionKeyComparator.Ma" +
+ "rk\022P\n\003end\030\003 \001(\0132C.org.apache.hadoop.hive" +
+ ".metastore.hbase.PartitionKeyComparator." +
+ "Mark\032\241\001\n\010Operator\022Z\n\004type\030\001 \002(\0162L.org.ap" +
+ "ache.hadoop.hive.metastore.hbase.Partiti" +
+ "onKeyComparator.Operator.Type\022\013\n\003key\030\002 \002" +
+ "(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Type\022\010\n\004LIKE\020\000\022\r\n\tNOT" +
+ "EQUALS\020\001*#\n\rPrincipalType\022\010\n\004USER\020\000\022\010\n\004R" +
+ "OLE\020\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -34689,7 +34846,7 @@ public final class HbaseMetastoreProto {
internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor,
- new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", });
+ new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", "BitVectors", });
internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_descriptor =
internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor.getNestedTypes().get(0);
internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_fieldAccessorTable = new
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
index 0203b06..81577b6 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
@@ -5425,6 +5425,11 @@ void BooleanColumnStatsData::__set_numNulls(const int64_t val) {
this->numNulls = val;
}
+void BooleanColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5473,6 +5478,14 @@ uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr
xfer += iprot->skip(ftype);
}
break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -5508,6 +5521,11 @@ uint32_t BooleanColumnStatsData::write(::apache::thrift::protocol::TProtocol* op
xfer += oprot->writeI64(this->numNulls);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -5518,17 +5536,23 @@ void swap(BooleanColumnStatsData &a, BooleanColumnStatsData &b) {
swap(a.numTrues, b.numTrues);
swap(a.numFalses, b.numFalses);
swap(a.numNulls, b.numNulls);
+ swap(a.bitVectors, b.bitVectors);
+ swap(a.__isset, b.__isset);
}
BooleanColumnStatsData::BooleanColumnStatsData(const BooleanColumnStatsData& other279) {
numTrues = other279.numTrues;
numFalses = other279.numFalses;
numNulls = other279.numNulls;
+ bitVectors = other279.bitVectors;
+ __isset = other279.__isset;
}
BooleanColumnStatsData& BooleanColumnStatsData::operator=(const BooleanColumnStatsData& other280) {
numTrues = other280.numTrues;
numFalses = other280.numFalses;
numNulls = other280.numNulls;
+ bitVectors = other280.bitVectors;
+ __isset = other280.__isset;
return *this;
}
void BooleanColumnStatsData::printTo(std::ostream& out) const {
@@ -5537,6 +5561,7 @@ void BooleanColumnStatsData::printTo(std::ostream& out) const {
out << "numTrues=" << to_string(numTrues);
out << ", " << "numFalses=" << to_string(numFalses);
out << ", " << "numNulls=" << to_string(numNulls);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -5563,6 +5588,11 @@ void DoubleColumnStatsData::__set_numDVs(const int64_t val) {
this->numDVs = val;
}
+void DoubleColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5618,6 +5648,14 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
xfer += iprot->skip(ftype);
}
break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -5657,6 +5695,11 @@ uint32_t DoubleColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
xfer += oprot->writeI64(this->numDVs);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -5668,6 +5711,7 @@ void swap(DoubleColumnStatsData &a, DoubleColumnStatsData &b) {
swap(a.highValue, b.highValue);
swap(a.numNulls, b.numNulls);
swap(a.numDVs, b.numDVs);
+ swap(a.bitVectors, b.bitVectors);
swap(a.__isset, b.__isset);
}
@@ -5676,6 +5720,7 @@ DoubleColumnStatsData::DoubleColumnStatsData(const DoubleColumnStatsData& other2
highValue = other281.highValue;
numNulls = other281.numNulls;
numDVs = other281.numDVs;
+ bitVectors = other281.bitVectors;
__isset = other281.__isset;
}
DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsData& other282) {
@@ -5683,6 +5728,7 @@ DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsD
highValue = other282.highValue;
numNulls = other282.numNulls;
numDVs = other282.numDVs;
+ bitVectors = other282.bitVectors;
__isset = other282.__isset;
return *this;
}
@@ -5693,6 +5739,7 @@ void DoubleColumnStatsData::printTo(std::ostream& out) const {
out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
out << ", " << "numNulls=" << to_string(numNulls);
out << ", " << "numDVs=" << to_string(numDVs);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -5719,6 +5766,11 @@ void LongColumnStatsData::__set_numDVs(const int64_t val) {
this->numDVs = val;
}
+void LongColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5774,6 +5826,14 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot)
xfer += iprot->skip(ftype);
}
break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -5813,6 +5873,11 @@ uint32_t LongColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot
xfer += oprot->writeI64(this->numDVs);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -5824,6 +5889,7 @@ void swap(LongColumnStatsData &a, LongColumnStatsData &b) {
swap(a.highValue, b.highValue);
swap(a.numNulls, b.numNulls);
swap(a.numDVs, b.numDVs);
+ swap(a.bitVectors, b.bitVectors);
swap(a.__isset, b.__isset);
}
@@ -5832,6 +5898,7 @@ LongColumnStatsData::LongColumnStatsData(const LongColumnStatsData& other283) {
highValue = other283.highValue;
numNulls = other283.numNulls;
numDVs = other283.numDVs;
+ bitVectors = other283.bitVectors;
__isset = other283.__isset;
}
LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& other284) {
@@ -5839,6 +5906,7 @@ LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& o
highValue = other284.highValue;
numNulls = other284.numNulls;
numDVs = other284.numDVs;
+ bitVectors = other284.bitVectors;
__isset = other284.__isset;
return *this;
}
@@ -5849,6 +5917,7 @@ void LongColumnStatsData::printTo(std::ostream& out) const {
out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
out << ", " << "numNulls=" << to_string(numNulls);
out << ", " << "numDVs=" << to_string(numDVs);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -5873,6 +5942,11 @@ void StringColumnStatsData::__set_numDVs(const int64_t val) {
this->numDVs = val;
}
+void StringColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -5930,6 +6004,14 @@ uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
xfer += iprot->skip(ftype);
}
break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -5971,6 +6053,11 @@ uint32_t StringColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
xfer += oprot->writeI64(this->numDVs);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -5982,6 +6069,8 @@ void swap(StringColumnStatsData &a, StringColumnStatsData &b) {
swap(a.avgColLen, b.avgColLen);
swap(a.numNulls, b.numNulls);
swap(a.numDVs, b.numDVs);
+ swap(a.bitVectors, b.bitVectors);
+ swap(a.__isset, b.__isset);
}
StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other285) {
@@ -5989,12 +6078,16 @@ StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other2
avgColLen = other285.avgColLen;
numNulls = other285.numNulls;
numDVs = other285.numDVs;
+ bitVectors = other285.bitVectors;
+ __isset = other285.__isset;
}
StringColumnStatsData& StringColumnStatsData::operator=(const StringColumnStatsData& other286) {
maxColLen = other286.maxColLen;
avgColLen = other286.avgColLen;
numNulls = other286.numNulls;
numDVs = other286.numDVs;
+ bitVectors = other286.bitVectors;
+ __isset = other286.__isset;
return *this;
}
void StringColumnStatsData::printTo(std::ostream& out) const {
@@ -6004,6 +6097,7 @@ void StringColumnStatsData::printTo(std::ostream& out) const {
out << ", " << "avgColLen=" << to_string(avgColLen);
out << ", " << "numNulls=" << to_string(numNulls);
out << ", " << "numDVs=" << to_string(numDVs);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -6024,6 +6118,11 @@ void BinaryColumnStatsData::__set_numNulls(const int64_t val) {
this->numNulls = val;
}
+void BinaryColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6072,6 +6171,14 @@ uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro
xfer += iprot->skip(ftype);
}
break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -6107,6 +6214,11 @@ uint32_t BinaryColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr
xfer += oprot->writeI64(this->numNulls);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -6117,17 +6229,23 @@ void swap(BinaryColumnStatsData &a, BinaryColumnStatsData &b) {
swap(a.maxColLen, b.maxColLen);
swap(a.avgColLen, b.avgColLen);
swap(a.numNulls, b.numNulls);
+ swap(a.bitVectors, b.bitVectors);
+ swap(a.__isset, b.__isset);
}
BinaryColumnStatsData::BinaryColumnStatsData(const BinaryColumnStatsData& other287) {
maxColLen = other287.maxColLen;
avgColLen = other287.avgColLen;
numNulls = other287.numNulls;
+ bitVectors = other287.bitVectors;
+ __isset = other287.__isset;
}
BinaryColumnStatsData& BinaryColumnStatsData::operator=(const BinaryColumnStatsData& other288) {
maxColLen = other288.maxColLen;
avgColLen = other288.avgColLen;
numNulls = other288.numNulls;
+ bitVectors = other288.bitVectors;
+ __isset = other288.__isset;
return *this;
}
void BinaryColumnStatsData::printTo(std::ostream& out) const {
@@ -6136,6 +6254,7 @@ void BinaryColumnStatsData::printTo(std::ostream& out) const {
out << "maxColLen=" << to_string(maxColLen);
out << ", " << "avgColLen=" << to_string(avgColLen);
out << ", " << "numNulls=" << to_string(numNulls);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -6271,6 +6390,11 @@ void DecimalColumnStatsData::__set_numDVs(const int64_t val) {
this->numDVs = val;
}
+void DecimalColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6326,6 +6450,14 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr
xfer += iprot->skip(ftype);
}
break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -6365,6 +6497,11 @@ uint32_t DecimalColumnStatsData::write(::apache::thrift::protocol::TProtocol* op
xfer += oprot->writeI64(this->numDVs);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -6376,6 +6513,7 @@ void swap(DecimalColumnStatsData &a, DecimalColumnStatsData &b) {
swap(a.highValue, b.highValue);
swap(a.numNulls, b.numNulls);
swap(a.numDVs, b.numDVs);
+ swap(a.bitVectors, b.bitVectors);
swap(a.__isset, b.__isset);
}
@@ -6384,6 +6522,7 @@ DecimalColumnStatsData::DecimalColumnStatsData(const DecimalColumnStatsData& oth
highValue = other291.highValue;
numNulls = other291.numNulls;
numDVs = other291.numDVs;
+ bitVectors = other291.bitVectors;
__isset = other291.__isset;
}
DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnStatsData& other292) {
@@ -6391,6 +6530,7 @@ DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnSta
highValue = other292.highValue;
numNulls = other292.numNulls;
numDVs = other292.numDVs;
+ bitVectors = other292.bitVectors;
__isset = other292.__isset;
return *this;
}
@@ -6401,6 +6541,7 @@ void DecimalColumnStatsData::printTo(std::ostream& out) const {
out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
out << ", " << "numNulls=" << to_string(numNulls);
out << ", " << "numDVs=" << to_string(numDVs);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
@@ -6513,6 +6654,11 @@ void DateColumnStatsData::__set_numDVs(const int64_t val) {
this->numDVs = val;
}
+void DateColumnStatsData::__set_bitVectors(const std::string& val) {
+ this->bitVectors = val;
+__isset.bitVectors = true;
+}
+
uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -6568,6 +6714,14 @@ uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot)
xfer += iprot->skip(ftype);
}
break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->bitVectors);
+ this->__isset.bitVectors = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -6607,6 +6761,11 @@ uint32_t DateColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot
xfer += oprot->writeI64(this->numDVs);
xfer += oprot->writeFieldEnd();
+ if (this->__isset.bitVectors) {
+ xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5);
+ xfer += oprot->writeString(this->bitVectors);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -6618,6 +6777,7 @@ void swap(DateColumnStatsData &a, DateColumnStatsData &b) {
swap(a.highValue, b.highValue);
swap(a.numNulls, b.numNulls);
swap(a.numDVs, b.numDVs);
+ swap(a.bitVectors, b.bitVectors);
swap(a.__isset, b.__isset);
}
@@ -6626,6 +6786,7 @@ DateColumnStatsData::DateColumnStatsData(const DateColumnStatsData& other295) {
highValue = other295.highValue;
numNulls = other295.numNulls;
numDVs = other295.numDVs;
+ bitVectors = other295.bitVectors;
__isset = other295.__isset;
}
DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& other296) {
@@ -6633,6 +6794,7 @@ DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& o
highValue = other296.highValue;
numNulls = other296.numNulls;
numDVs = other296.numDVs;
+ bitVectors = other296.bitVectors;
__isset = other296.__isset;
return *this;
}
@@ -6643,6 +6805,7 @@ void DateColumnStatsData::printTo(std::ostream& out) const {
out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "<null>"));
out << ", " << "numNulls=" << to_string(numNulls);
out << ", " << "numDVs=" << to_string(numDVs);
+ out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "<null>"));
out << ")";
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
index ce1d7da..c501ac0 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
@@ -2364,19 +2364,26 @@ inline std::ostream& operator<<(std::ostream& out, const Index& obj)
return out;
}
+typedef struct _BooleanColumnStatsData__isset {
+ _BooleanColumnStatsData__isset() : bitVectors(false) {}
+ bool bitVectors :1;
+} _BooleanColumnStatsData__isset;
class BooleanColumnStatsData {
public:
BooleanColumnStatsData(const BooleanColumnStatsData&);
BooleanColumnStatsData& operator=(const BooleanColumnStatsData&);
- BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0) {
+ BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0), bitVectors() {
}
virtual ~BooleanColumnStatsData() throw();
int64_t numTrues;
int64_t numFalses;
int64_t numNulls;
+ std::string bitVectors;
+
+ _BooleanColumnStatsData__isset __isset;
void __set_numTrues(const int64_t val);
@@ -2384,6 +2391,8 @@ class BooleanColumnStatsData {
void __set_numNulls(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const BooleanColumnStatsData & rhs) const
{
if (!(numTrues == rhs.numTrues))
@@ -2392,6 +2401,10 @@ class BooleanColumnStatsData {
return false;
if (!(numNulls == rhs.numNulls))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const BooleanColumnStatsData &rhs) const {
@@ -2415,9 +2428,10 @@ inline std::ostream& operator<<(std::ostream& out, const BooleanColumnStatsData&
}
typedef struct _DoubleColumnStatsData__isset {
- _DoubleColumnStatsData__isset() : lowValue(false), highValue(false) {}
+ _DoubleColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
bool lowValue :1;
bool highValue :1;
+ bool bitVectors :1;
} _DoubleColumnStatsData__isset;
class DoubleColumnStatsData {
@@ -2425,7 +2439,7 @@ class DoubleColumnStatsData {
DoubleColumnStatsData(const DoubleColumnStatsData&);
DoubleColumnStatsData& operator=(const DoubleColumnStatsData&);
- DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) {
+ DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() {
}
virtual ~DoubleColumnStatsData() throw();
@@ -2433,6 +2447,7 @@ class DoubleColumnStatsData {
double highValue;
int64_t numNulls;
int64_t numDVs;
+ std::string bitVectors;
_DoubleColumnStatsData__isset __isset;
@@ -2444,6 +2459,8 @@ class DoubleColumnStatsData {
void __set_numDVs(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const DoubleColumnStatsData & rhs) const
{
if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2458,6 +2475,10 @@ class DoubleColumnStatsData {
return false;
if (!(numDVs == rhs.numDVs))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const DoubleColumnStatsData &rhs) const {
@@ -2481,9 +2502,10 @@ inline std::ostream& operator<<(std::ostream& out, const DoubleColumnStatsData&
}
typedef struct _LongColumnStatsData__isset {
- _LongColumnStatsData__isset() : lowValue(false), highValue(false) {}
+ _LongColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
bool lowValue :1;
bool highValue :1;
+ bool bitVectors :1;
} _LongColumnStatsData__isset;
class LongColumnStatsData {
@@ -2491,7 +2513,7 @@ class LongColumnStatsData {
LongColumnStatsData(const LongColumnStatsData&);
LongColumnStatsData& operator=(const LongColumnStatsData&);
- LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) {
+ LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() {
}
virtual ~LongColumnStatsData() throw();
@@ -2499,6 +2521,7 @@ class LongColumnStatsData {
int64_t highValue;
int64_t numNulls;
int64_t numDVs;
+ std::string bitVectors;
_LongColumnStatsData__isset __isset;
@@ -2510,6 +2533,8 @@ class LongColumnStatsData {
void __set_numDVs(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const LongColumnStatsData & rhs) const
{
if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2524,6 +2549,10 @@ class LongColumnStatsData {
return false;
if (!(numDVs == rhs.numDVs))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const LongColumnStatsData &rhs) const {
@@ -2546,13 +2575,17 @@ inline std::ostream& operator<<(std::ostream& out, const LongColumnStatsData& ob
return out;
}
+typedef struct _StringColumnStatsData__isset {
+ _StringColumnStatsData__isset() : bitVectors(false) {}
+ bool bitVectors :1;
+} _StringColumnStatsData__isset;
class StringColumnStatsData {
public:
StringColumnStatsData(const StringColumnStatsData&);
StringColumnStatsData& operator=(const StringColumnStatsData&);
- StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0) {
+ StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0), bitVectors() {
}
virtual ~StringColumnStatsData() throw();
@@ -2560,6 +2593,9 @@ class StringColumnStatsData {
double avgColLen;
int64_t numNulls;
int64_t numDVs;
+ std::string bitVectors;
+
+ _StringColumnStatsData__isset __isset;
void __set_maxColLen(const int64_t val);
@@ -2569,6 +2605,8 @@ class StringColumnStatsData {
void __set_numDVs(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const StringColumnStatsData & rhs) const
{
if (!(maxColLen == rhs.maxColLen))
@@ -2579,6 +2617,10 @@ class StringColumnStatsData {
return false;
if (!(numDVs == rhs.numDVs))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const StringColumnStatsData &rhs) const {
@@ -2601,19 +2643,26 @@ inline std::ostream& operator<<(std::ostream& out, const StringColumnStatsData&
return out;
}
+typedef struct _BinaryColumnStatsData__isset {
+ _BinaryColumnStatsData__isset() : bitVectors(false) {}
+ bool bitVectors :1;
+} _BinaryColumnStatsData__isset;
class BinaryColumnStatsData {
public:
BinaryColumnStatsData(const BinaryColumnStatsData&);
BinaryColumnStatsData& operator=(const BinaryColumnStatsData&);
- BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0) {
+ BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), bitVectors() {
}
virtual ~BinaryColumnStatsData() throw();
int64_t maxColLen;
double avgColLen;
int64_t numNulls;
+ std::string bitVectors;
+
+ _BinaryColumnStatsData__isset __isset;
void __set_maxColLen(const int64_t val);
@@ -2621,6 +2670,8 @@ class BinaryColumnStatsData {
void __set_numNulls(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const BinaryColumnStatsData & rhs) const
{
if (!(maxColLen == rhs.maxColLen))
@@ -2629,6 +2680,10 @@ class BinaryColumnStatsData {
return false;
if (!(numNulls == rhs.numNulls))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const BinaryColumnStatsData &rhs) const {
@@ -2697,9 +2752,10 @@ inline std::ostream& operator<<(std::ostream& out, const Decimal& obj)
}
typedef struct _DecimalColumnStatsData__isset {
- _DecimalColumnStatsData__isset() : lowValue(false), highValue(false) {}
+ _DecimalColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
bool lowValue :1;
bool highValue :1;
+ bool bitVectors :1;
} _DecimalColumnStatsData__isset;
class DecimalColumnStatsData {
@@ -2707,7 +2763,7 @@ class DecimalColumnStatsData {
DecimalColumnStatsData(const DecimalColumnStatsData&);
DecimalColumnStatsData& operator=(const DecimalColumnStatsData&);
- DecimalColumnStatsData() : numNulls(0), numDVs(0) {
+ DecimalColumnStatsData() : numNulls(0), numDVs(0), bitVectors() {
}
virtual ~DecimalColumnStatsData() throw();
@@ -2715,6 +2771,7 @@ class DecimalColumnStatsData {
Decimal highValue;
int64_t numNulls;
int64_t numDVs;
+ std::string bitVectors;
_DecimalColumnStatsData__isset __isset;
@@ -2726,6 +2783,8 @@ class DecimalColumnStatsData {
void __set_numDVs(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const DecimalColumnStatsData & rhs) const
{
if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2740,6 +2799,10 @@ class DecimalColumnStatsData {
return false;
if (!(numDVs == rhs.numDVs))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const DecimalColumnStatsData &rhs) const {
@@ -2803,9 +2866,10 @@ inline std::ostream& operator<<(std::ostream& out, const Date& obj)
}
typedef struct _DateColumnStatsData__isset {
- _DateColumnStatsData__isset() : lowValue(false), highValue(false) {}
+ _DateColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {}
bool lowValue :1;
bool highValue :1;
+ bool bitVectors :1;
} _DateColumnStatsData__isset;
class DateColumnStatsData {
@@ -2813,7 +2877,7 @@ class DateColumnStatsData {
DateColumnStatsData(const DateColumnStatsData&);
DateColumnStatsData& operator=(const DateColumnStatsData&);
- DateColumnStatsData() : numNulls(0), numDVs(0) {
+ DateColumnStatsData() : numNulls(0), numDVs(0), bitVectors() {
}
virtual ~DateColumnStatsData() throw();
@@ -2821,6 +2885,7 @@ class DateColumnStatsData {
Date highValue;
int64_t numNulls;
int64_t numDVs;
+ std::string bitVectors;
_DateColumnStatsData__isset __isset;
@@ -2832,6 +2897,8 @@ class DateColumnStatsData {
void __set_numDVs(const int64_t val);
+ void __set_bitVectors(const std::string& val);
+
bool operator == (const DateColumnStatsData & rhs) const
{
if (__isset.lowValue != rhs.__isset.lowValue)
@@ -2846,6 +2913,10 @@ class DateColumnStatsData {
return false;
if (!(numDVs == rhs.numDVs))
return false;
+ if (__isset.bitVectors != rhs.__isset.bitVectors)
+ return false;
+ else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors))
+ return false;
return true;
}
bool operator != (const DateColumnStatsData &rhs) const {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
index 84e393c..eeb5105 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java
@@ -41,6 +41,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
private static final org.apache.thrift.protocol.TField MAX_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("maxColLen", org.apache.thrift.protocol.TType.I64, (short)1);
private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -51,12 +52,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
private long maxColLen; // required
private double avgColLen; // required
private long numNulls; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
MAX_COL_LEN((short)1, "maxColLen"),
AVG_COL_LEN((short)2, "avgColLen"),
- NUM_NULLS((short)3, "numNulls");
+ NUM_NULLS((short)3, "numNulls"),
+ BIT_VECTORS((short)4, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -77,6 +80,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
return AVG_COL_LEN;
case 3: // NUM_NULLS
return NUM_NULLS;
+ case 4: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -121,6 +126,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
private static final int __AVGCOLLEN_ISSET_ID = 1;
private static final int __NUMNULLS_ISSET_ID = 2;
private byte __isset_bitfield = 0;
+ private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -130,6 +136,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BinaryColumnStatsData.class, metaDataMap);
}
@@ -159,6 +167,9 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
this.maxColLen = other.maxColLen;
this.avgColLen = other.avgColLen;
this.numNulls = other.numNulls;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public BinaryColumnStatsData deepCopy() {
@@ -173,6 +184,7 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
this.avgColLen = 0.0;
setNumNullsIsSet(false);
this.numNulls = 0;
+ this.bitVectors = null;
}
public long getMaxColLen() {
@@ -241,6 +253,29 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case MAX_COL_LEN:
@@ -267,6 +302,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -281,6 +324,9 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
case NUM_NULLS:
return getNumNulls();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -298,6 +344,8 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
return isSetAvgColLen();
case NUM_NULLS:
return isSetNumNulls();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -342,6 +390,15 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -364,6 +421,11 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
if (present_numNulls)
list.add(numNulls);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -405,6 +467,16 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -436,6 +508,16 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
sb.append("numNulls:");
sb.append(this.numNulls);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -517,6 +599,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 4: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -539,6 +629,13 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC);
oprot.writeI64(struct.numNulls);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -559,6 +656,14 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
oprot.writeI64(struct.maxColLen);
oprot.writeDouble(struct.avgColLen);
oprot.writeI64(struct.numNulls);
+ BitSet optionals = new BitSet();
+ if (struct.isSetBitVectors()) {
+ optionals.set(0);
+ }
+ oprot.writeBitSet(optionals, 1);
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -570,6 +675,11 @@ public class BinaryColumnStatsData implements org.apache.thrift.TBase<BinaryColu
struct.setAvgColLenIsSet(true);
struct.numNulls = iprot.readI64();
struct.setNumNullsIsSet(true);
+ BitSet incoming = iprot.readBitSet(1);
+ if (incoming.get(0)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
[2/4] hive git commit: HIVE-12763: Use bit vector to track NDV
(Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
index 409c247..77dd9a6 100644
--- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
+++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
@@ -3749,6 +3749,7 @@ class BooleanColumnStatsData:
- numTrues
- numFalses
- numNulls
+ - bitVectors
"""
thrift_spec = (
@@ -3756,12 +3757,14 @@ class BooleanColumnStatsData:
(1, TType.I64, 'numTrues', None, None, ), # 1
(2, TType.I64, 'numFalses', None, None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
+ (4, TType.STRING, 'bitVectors', None, None, ), # 4
)
- def __init__(self, numTrues=None, numFalses=None, numNulls=None,):
+ def __init__(self, numTrues=None, numFalses=None, numNulls=None, bitVectors=None,):
self.numTrues = numTrues
self.numFalses = numFalses
self.numNulls = numNulls
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -3787,6 +3790,11 @@ class BooleanColumnStatsData:
self.numNulls = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 4:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -3809,6 +3817,10 @@ class BooleanColumnStatsData:
oprot.writeFieldBegin('numNulls', TType.I64, 3)
oprot.writeI64(self.numNulls)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 4)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -3827,6 +3839,7 @@ class BooleanColumnStatsData:
value = (value * 31) ^ hash(self.numTrues)
value = (value * 31) ^ hash(self.numFalses)
value = (value * 31) ^ hash(self.numNulls)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -3847,6 +3860,7 @@ class DoubleColumnStatsData:
- highValue
- numNulls
- numDVs
+ - bitVectors
"""
thrift_spec = (
@@ -3855,13 +3869,15 @@ class DoubleColumnStatsData:
(2, TType.DOUBLE, 'highValue', None, None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
(4, TType.I64, 'numDVs', None, None, ), # 4
+ (5, TType.STRING, 'bitVectors', None, None, ), # 5
)
- def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+ def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
self.lowValue = lowValue
self.highValue = highValue
self.numNulls = numNulls
self.numDVs = numDVs
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -3892,6 +3908,11 @@ class DoubleColumnStatsData:
self.numDVs = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -3918,6 +3939,10 @@ class DoubleColumnStatsData:
oprot.writeFieldBegin('numDVs', TType.I64, 4)
oprot.writeI64(self.numDVs)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -3935,6 +3960,7 @@ class DoubleColumnStatsData:
value = (value * 31) ^ hash(self.highValue)
value = (value * 31) ^ hash(self.numNulls)
value = (value * 31) ^ hash(self.numDVs)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -3955,6 +3981,7 @@ class LongColumnStatsData:
- highValue
- numNulls
- numDVs
+ - bitVectors
"""
thrift_spec = (
@@ -3963,13 +3990,15 @@ class LongColumnStatsData:
(2, TType.I64, 'highValue', None, None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
(4, TType.I64, 'numDVs', None, None, ), # 4
+ (5, TType.STRING, 'bitVectors', None, None, ), # 5
)
- def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+ def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
self.lowValue = lowValue
self.highValue = highValue
self.numNulls = numNulls
self.numDVs = numDVs
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4000,6 +4029,11 @@ class LongColumnStatsData:
self.numDVs = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -4026,6 +4060,10 @@ class LongColumnStatsData:
oprot.writeFieldBegin('numDVs', TType.I64, 4)
oprot.writeI64(self.numDVs)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -4043,6 +4081,7 @@ class LongColumnStatsData:
value = (value * 31) ^ hash(self.highValue)
value = (value * 31) ^ hash(self.numNulls)
value = (value * 31) ^ hash(self.numDVs)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -4063,6 +4102,7 @@ class StringColumnStatsData:
- avgColLen
- numNulls
- numDVs
+ - bitVectors
"""
thrift_spec = (
@@ -4071,13 +4111,15 @@ class StringColumnStatsData:
(2, TType.DOUBLE, 'avgColLen', None, None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
(4, TType.I64, 'numDVs', None, None, ), # 4
+ (5, TType.STRING, 'bitVectors', None, None, ), # 5
)
- def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None,):
+ def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None, bitVectors=None,):
self.maxColLen = maxColLen
self.avgColLen = avgColLen
self.numNulls = numNulls
self.numDVs = numDVs
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4108,6 +4150,11 @@ class StringColumnStatsData:
self.numDVs = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -4134,6 +4181,10 @@ class StringColumnStatsData:
oprot.writeFieldBegin('numDVs', TType.I64, 4)
oprot.writeI64(self.numDVs)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -4155,6 +4206,7 @@ class StringColumnStatsData:
value = (value * 31) ^ hash(self.avgColLen)
value = (value * 31) ^ hash(self.numNulls)
value = (value * 31) ^ hash(self.numDVs)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -4174,6 +4226,7 @@ class BinaryColumnStatsData:
- maxColLen
- avgColLen
- numNulls
+ - bitVectors
"""
thrift_spec = (
@@ -4181,12 +4234,14 @@ class BinaryColumnStatsData:
(1, TType.I64, 'maxColLen', None, None, ), # 1
(2, TType.DOUBLE, 'avgColLen', None, None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
+ (4, TType.STRING, 'bitVectors', None, None, ), # 4
)
- def __init__(self, maxColLen=None, avgColLen=None, numNulls=None,):
+ def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, bitVectors=None,):
self.maxColLen = maxColLen
self.avgColLen = avgColLen
self.numNulls = numNulls
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4212,6 +4267,11 @@ class BinaryColumnStatsData:
self.numNulls = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 4:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -4234,6 +4294,10 @@ class BinaryColumnStatsData:
oprot.writeFieldBegin('numNulls', TType.I64, 3)
oprot.writeI64(self.numNulls)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 4)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -4252,6 +4316,7 @@ class BinaryColumnStatsData:
value = (value * 31) ^ hash(self.maxColLen)
value = (value * 31) ^ hash(self.avgColLen)
value = (value * 31) ^ hash(self.numNulls)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -4355,6 +4420,7 @@ class DecimalColumnStatsData:
- highValue
- numNulls
- numDVs
+ - bitVectors
"""
thrift_spec = (
@@ -4363,13 +4429,15 @@ class DecimalColumnStatsData:
(2, TType.STRUCT, 'highValue', (Decimal, Decimal.thrift_spec), None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
(4, TType.I64, 'numDVs', None, None, ), # 4
+ (5, TType.STRING, 'bitVectors', None, None, ), # 5
)
- def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+ def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
self.lowValue = lowValue
self.highValue = highValue
self.numNulls = numNulls
self.numDVs = numDVs
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4402,6 +4470,11 @@ class DecimalColumnStatsData:
self.numDVs = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -4428,6 +4501,10 @@ class DecimalColumnStatsData:
oprot.writeFieldBegin('numDVs', TType.I64, 4)
oprot.writeI64(self.numDVs)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -4445,6 +4522,7 @@ class DecimalColumnStatsData:
value = (value * 31) ^ hash(self.highValue)
value = (value * 31) ^ hash(self.numNulls)
value = (value * 31) ^ hash(self.numDVs)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
@@ -4532,6 +4610,7 @@ class DateColumnStatsData:
- highValue
- numNulls
- numDVs
+ - bitVectors
"""
thrift_spec = (
@@ -4540,13 +4619,15 @@ class DateColumnStatsData:
(2, TType.STRUCT, 'highValue', (Date, Date.thrift_spec), None, ), # 2
(3, TType.I64, 'numNulls', None, None, ), # 3
(4, TType.I64, 'numDVs', None, None, ), # 4
+ (5, TType.STRING, 'bitVectors', None, None, ), # 5
)
- def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,):
+ def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,):
self.lowValue = lowValue
self.highValue = highValue
self.numNulls = numNulls
self.numDVs = numDVs
+ self.bitVectors = bitVectors
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -4579,6 +4660,11 @@ class DateColumnStatsData:
self.numDVs = iprot.readI64()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.bitVectors = iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -4605,6 +4691,10 @@ class DateColumnStatsData:
oprot.writeFieldBegin('numDVs', TType.I64, 4)
oprot.writeI64(self.numDVs)
oprot.writeFieldEnd()
+ if self.bitVectors is not None:
+ oprot.writeFieldBegin('bitVectors', TType.STRING, 5)
+ oprot.writeString(self.bitVectors)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -4622,6 +4712,7 @@ class DateColumnStatsData:
value = (value * 31) ^ hash(self.highValue)
value = (value * 31) ^ hash(self.numNulls)
value = (value * 31) ^ hash(self.numDVs)
+ value = (value * 31) ^ hash(self.bitVectors)
return value
def __repr__(self):
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
index a473611..2cf433b 100644
--- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
+++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
@@ -824,11 +824,13 @@ class BooleanColumnStatsData
NUMTRUES = 1
NUMFALSES = 2
NUMNULLS = 3
+ BITVECTORS = 4
FIELDS = {
NUMTRUES => {:type => ::Thrift::Types::I64, :name => 'numTrues'},
NUMFALSES => {:type => ::Thrift::Types::I64, :name => 'numFalses'},
- NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}
+ NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -848,12 +850,14 @@ class DoubleColumnStatsData
HIGHVALUE = 2
NUMNULLS = 3
NUMDVS = 4
+ BITVECTORS = 5
FIELDS = {
LOWVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'lowValue', :optional => true},
HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue', :optional => true},
NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
- NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -872,12 +876,14 @@ class LongColumnStatsData
HIGHVALUE = 2
NUMNULLS = 3
NUMDVS = 4
+ BITVECTORS = 5
FIELDS = {
LOWVALUE => {:type => ::Thrift::Types::I64, :name => 'lowValue', :optional => true},
HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue', :optional => true},
NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
- NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -896,12 +902,14 @@ class StringColumnStatsData
AVGCOLLEN = 2
NUMNULLS = 3
NUMDVS = 4
+ BITVECTORS = 5
FIELDS = {
MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'},
AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'},
NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
- NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -921,11 +929,13 @@ class BinaryColumnStatsData
MAXCOLLEN = 1
AVGCOLLEN = 2
NUMNULLS = 3
+ BITVECTORS = 4
FIELDS = {
MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'},
AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'},
- NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}
+ NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -965,12 +975,14 @@ class DecimalColumnStatsData
HIGHVALUE = 2
NUMNULLS = 3
NUMDVS = 4
+ BITVECTORS = 5
FIELDS = {
LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal, :optional => true},
HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal, :optional => true},
NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
- NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
@@ -1006,12 +1018,14 @@ class DateColumnStatsData
HIGHVALUE = 2
NUMNULLS = 3
NUMDVS = 4
+ BITVECTORS = 5
FIELDS = {
LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Date, :optional => true},
HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Date, :optional => true},
NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
- NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'},
+ BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true}
}
def struct_fields; FIELDS; end
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
new file mode 100644
index 0000000..92f9a84
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java
@@ -0,0 +1,367 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore;
+import java.util.Random;
+
+import javolution.util.FastBitSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.io.Text;
+
+/*
+ * https://en.wikipedia.org/wiki/Flajolet%E2%80%93Martin_algorithm
+ * We implement Flajolet–Martin algorithm in this class.
+ * The Flajolet–Martin algorithm is an algorithm for approximating the number of distinct elements
+ * in a stream with a single pass and space-consumption which is logarithmic in the maximum number
+ * of possible distinct elements in the stream. The algorithm was introduced by Philippe Flajolet
+ * and G. Nigel Martin in their 1984 paper "Probabilistic Counting Algorithms for Data Base Applications".
+ * Later it has been refined in the papers "LogLog counting of large cardinalities" by Marianne Durand
+ * and Philippe Flajolet, and "HyperLogLog: The analysis of a near-optimal cardinality estimation
+ * algorithm" by Philippe Flajolet et al.
+ */
+
+/*
+ * The algorithm works like this.
+ * (1) Set the number of bit vectors, i.e., numBitVectors, based on the precision.
+ * (2) For each bit vector, generate hash value of the long value and mod it by 2^bitVectorSize-1. (addToEstimator)
+ * (3) Set the index (addToEstimator)
+ * (4) Take the average of the index for all the bit vectors and get the estimated NDV (estimateNumDistinctValues).
+ */
+public class NumDistinctValueEstimator {
+
+ static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName());
+
+ /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
+ * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
+ * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise
+ * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
+ * thus introducing errors in the estimates.
+ */
+ private static final int BIT_VECTOR_SIZE = 31;
+ private final int numBitVectors;
+
+ // Refer to Flajolet-Martin'86 for the value of phi
+ private static final double PHI = 0.77351;
+
+ private final int[] a;
+ private final int[] b;
+ private final FastBitSet[] bitVector;
+
+ private final Random aValue;
+ private final Random bValue;
+
+ /* Create a new distinctValueEstimator
+ */
+ public NumDistinctValueEstimator(int numBitVectors) {
+ this.numBitVectors = numBitVectors;
+ bitVector = new FastBitSet[numBitVectors];
+ for (int i=0; i< numBitVectors; i++) {
+ bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
+ }
+
+ a = new int[numBitVectors];
+ b = new int[numBitVectors];
+
+ /* Use a large prime number as a seed to the random number generator.
+ * Java's random number generator uses the Linear Congruential Generator to generate random
+ * numbers using the following recurrence relation,
+ *
+ * X(n+1) = (a X(n) + c ) mod m
+ *
+ * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48
+ * is not a prime number and hence the set of numbers from 0 to m don't form a finite field.
+ * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair
+ * wise independent.
+ *
+ * However, empirically passing in prime numbers as seeds seems to work better than when passing
+ * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime.
+ *
+ */
+ aValue = new Random(99397);
+ bValue = new Random(9876413);
+
+ for (int i = 0; i < numBitVectors; i++) {
+ int randVal;
+ /* a and b shouldn't be even; If a and b are even, then none of the values
+ * will set bit 0 thus introducing errors in the estimate. Both a and b can be even
+ * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this
+ * always pick odd values for a and b.
+ */
+ do {
+ randVal = aValue.nextInt();
+ } while (randVal % 2 == 0);
+
+ a[i] = randVal;
+
+ do {
+ randVal = bValue.nextInt();
+ } while (randVal % 2 == 0);
+
+ b[i] = randVal;
+
+ if (a[i] < 0) {
+ a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1);
+ }
+
+ if (b[i] < 0) {
+ b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1);
+ }
+ }
+ }
+
+ public NumDistinctValueEstimator(String s, int numBitVectors) {
+ this.numBitVectors = numBitVectors;
+ FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors);
+ bitVector = new FastBitSet[numBitVectors];
+ for(int i=0; i <numBitVectors; i++) {
+ bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
+ bitVector[i].clear();
+ bitVector[i].or(bitVectorDeser[i]);
+ }
+
+ a = null;
+ b = null;
+
+ aValue = null;
+ bValue = null;
+ }
+
+ /**
+ * Resets a distinctValueEstimator object to its original state.
+ */
+ public void reset() {
+ for (int i=0; i< numBitVectors; i++) {
+ bitVector[i].clear();
+ }
+ }
+
+ public FastBitSet getBitVector(int index) {
+ return bitVector[index];
+ }
+
+ public int getnumBitVectors() {
+ return numBitVectors;
+ }
+
+ public int getBitVectorSize() {
+ return BIT_VECTOR_SIZE;
+ }
+
+ public void printNumDistinctValueEstimator() {
+ String t = new String();
+
+ LOG.debug("NumDistinctValueEstimator");
+ LOG.debug("Number of Vectors:");
+ LOG.debug(numBitVectors);
+ LOG.debug("Vector Size: ");
+ LOG.debug(BIT_VECTOR_SIZE);
+
+ for (int i=0; i < numBitVectors; i++) {
+ t = t + bitVector[i].toString();
+ }
+
+ LOG.debug("Serialized Vectors: ");
+ LOG.debug(t);
+ }
+
+ /* Serializes a distinctValueEstimator object to Text for transport.
+ *
+ */
+ public Text serialize() {
+ String s = new String();
+ for(int i=0; i < numBitVectors; i++) {
+ s = s + (bitVector[i].toString());
+ }
+ return new Text(s);
+ }
+
+ /* Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator object and
+ * returns it.
+ */
+
+ private FastBitSet[] deserialize(String s, int numBitVectors) {
+ FastBitSet[] b = new FastBitSet[numBitVectors];
+ for (int j=0; j < numBitVectors; j++) {
+ b[j] = new FastBitSet(BIT_VECTOR_SIZE);
+ b[j].clear();
+ }
+
+ int vectorIndex =0;
+
+ /* Parse input string to obtain the indexes that are set in the bitvector.
+ * When a toString() is called on a FastBitSet object to serialize it, the serialization
+ * adds { and } to the beginning and end of the return String.
+ * Skip "{", "}", ",", " " in the input string.
+ */
+ for(int i=1; i < s.length()-1;) {
+ char c = s.charAt(i);
+ i = i + 1;
+
+ // Move on to the next bit vector
+ if (c == '}') {
+ vectorIndex = vectorIndex + 1;
+ }
+
+ // Encountered a numeric value; Extract out the entire number
+ if (c >= '0' && c <= '9') {
+ String t = new String();
+ t = t + c;
+ c = s.charAt(i);
+ i = i + 1;
+
+ while (c != ',' && c!= '}') {
+ t = t + c;
+ c = s.charAt(i);
+ i = i + 1;
+ }
+
+ int bitIndex = Integer.parseInt(t);
+ assert(bitIndex >= 0);
+ assert(vectorIndex < numBitVectors);
+ b[vectorIndex].set(bitIndex);
+ if (c == '}') {
+ vectorIndex = vectorIndex + 1;
+ }
+ }
+ }
+ return b;
+ }
+
+ private int generateHash(long v, int hashNum) {
+ int mod = (1<<BIT_VECTOR_SIZE) - 1;
+ long tempHash = a[hashNum] * v + b[hashNum];
+ tempHash %= mod;
+ int hash = (int) tempHash;
+
+ /* Hash function should map the long value to 0...2^L-1.
+ * Hence hash value has to be non-negative.
+ */
+ if (hash < 0) {
+ hash = hash + mod;
+ }
+ return hash;
+ }
+
+ private int generateHashForPCSA(long v) {
+ return generateHash(v, 0);
+ }
+
+ public void addToEstimator(long v) {
+ /* Update summary bitVector :
+ * Generate hash value of the long value and mod it by 2^bitVectorSize-1.
+ * In this implementation bitVectorSize is 31.
+ */
+
+ for (int i = 0; i<numBitVectors; i++) {
+ int hash = generateHash(v,i);
+ int index;
+
+ // Find the index of the least significant bit that is 1
+ for (index=0; index<BIT_VECTOR_SIZE; index++) {
+ if (hash % 2 != 0) {
+ break;
+ }
+ hash = hash >> 1;
+ }
+
+ // Set bitvector[index] := 1
+ bitVector[i].set(index);
+ }
+ }
+
+ public void addToEstimatorPCSA(long v) {
+ int hash = generateHashForPCSA(v);
+ int rho = hash/numBitVectors;
+ int index;
+
+ // Find the index of the least significant bit that is 1
+ for (index=0; index<BIT_VECTOR_SIZE; index++) {
+ if (rho % 2 != 0) {
+ break;
+ }
+ rho = rho >> 1;
+ }
+
+ // Set bitvector[index] := 1
+ bitVector[hash%numBitVectors].set(index);
+ }
+
+ public void addToEstimator(double d) {
+ int v = new Double(d).hashCode();
+ addToEstimator(v);
+ }
+
+ public void addToEstimatorPCSA(double d) {
+ int v = new Double(d).hashCode();
+ addToEstimatorPCSA(v);
+ }
+
+ public void addToEstimator(HiveDecimal decimal) {
+ int v = decimal.hashCode();
+ addToEstimator(v);
+ }
+
+ public void addToEstimatorPCSA(HiveDecimal decimal) {
+ int v = decimal.hashCode();
+ addToEstimatorPCSA(v);
+ }
+
+ public void mergeEstimators(NumDistinctValueEstimator o) {
+ // Bitwise OR the bitvector with the bitvector in the agg buffer
+ for (int i=0; i<numBitVectors; i++) {
+ bitVector[i].or(o.getBitVector(i));
+ }
+ }
+
+ public long estimateNumDistinctValuesPCSA() {
+ double numDistinctValues = 0.0;
+ long S = 0;
+
+ for (int i=0; i < numBitVectors; i++) {
+ int index = 0;
+ while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) {
+ index = index + 1;
+ }
+ S = S + index;
+ }
+
+ numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors));
+ return ((long)numDistinctValues);
+ }
+
+ /* We use the Flajolet-Martin estimator to estimate the number of distinct values.FM uses the
+ * location of the least significant zero as an estimate of log2(phi*ndvs).
+ */
+ public long estimateNumDistinctValues() {
+ int sumLeastSigZero = 0;
+ double avgLeastSigZero;
+ double numDistinctValues;
+
+ for (int i=0; i< numBitVectors; i++) {
+ int leastSigZero = bitVector[i].nextClearBit(0);
+ sumLeastSigZero += leastSigZero;
+ }
+
+ avgLeastSigZero =
+ (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(PHI)/Math.log(2.0));
+ numDistinctValues = Math.pow(2.0, avgLeastSigZero);
+ return ((long)(numDistinctValues));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
index f4df2e2..d6d01bd 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
@@ -1133,9 +1133,8 @@ class HBaseUtils {
return proto.toByteArray();
}
- private static HbaseMetastoreProto.ColumnStats
- protoBufStatsForOneColumn(ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats)
- throws IOException {
+ private static HbaseMetastoreProto.ColumnStats protoBufStatsForOneColumn(
+ ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats) throws IOException {
HbaseMetastoreProto.ColumnStats.Builder builder = HbaseMetastoreProto.ColumnStats.newBuilder();
if (partitionColumnStats != null) {
builder.setLastAnalyzed(partitionColumnStats.getStatsDesc().getLastAnalyzed());
@@ -1147,80 +1146,77 @@ class HBaseUtils {
ColumnStatisticsData colData = colStats.getStatsData();
switch (colData.getSetField()) {
- case BOOLEAN_STATS:
- BooleanColumnStatsData boolData = colData.getBooleanStats();
- builder.setNumNulls(boolData.getNumNulls());
- builder.setBoolStats(
- HbaseMetastoreProto.ColumnStats.BooleanStats.newBuilder()
- .setNumTrues(boolData.getNumTrues())
- .setNumFalses(boolData.getNumFalses())
- .build());
- break;
-
- case LONG_STATS:
- LongColumnStatsData longData = colData.getLongStats();
- builder.setNumNulls(longData.getNumNulls());
- builder.setNumDistinctValues(longData.getNumDVs());
- builder.setLongStats(
- HbaseMetastoreProto.ColumnStats.LongStats.newBuilder()
- .setLowValue(longData.getLowValue())
- .setHighValue(longData.getHighValue())
- .build());
- break;
-
- case DOUBLE_STATS:
- DoubleColumnStatsData doubleData = colData.getDoubleStats();
- builder.setNumNulls(doubleData.getNumNulls());
- builder.setNumDistinctValues(doubleData.getNumDVs());
- builder.setDoubleStats(
- HbaseMetastoreProto.ColumnStats.DoubleStats.newBuilder()
- .setLowValue(doubleData.getLowValue())
- .setHighValue(doubleData.getHighValue())
- .build());
- break;
-
- case STRING_STATS:
- StringColumnStatsData stringData = colData.getStringStats();
- builder.setNumNulls(stringData.getNumNulls());
- builder.setNumDistinctValues(stringData.getNumDVs());
- builder.setStringStats(
- HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
- .setMaxColLength(stringData.getMaxColLen())
- .setAvgColLength(stringData.getAvgColLen())
- .build());
- break;
-
- case BINARY_STATS:
- BinaryColumnStatsData binaryData = colData.getBinaryStats();
- builder.setNumNulls(binaryData.getNumNulls());
- builder.setBinaryStats(
- HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
- .setMaxColLength(binaryData.getMaxColLen())
- .setAvgColLength(binaryData.getAvgColLen())
- .build());
- break;
-
- case DECIMAL_STATS:
- DecimalColumnStatsData decimalData = colData.getDecimalStats();
- builder.setNumNulls(decimalData.getNumNulls());
- builder.setNumDistinctValues(decimalData.getNumDVs());
- builder.setDecimalStats(
- HbaseMetastoreProto.ColumnStats.DecimalStats.newBuilder()
- .setLowValue(
- HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+ case BOOLEAN_STATS:
+ BooleanColumnStatsData boolData = colData.getBooleanStats();
+ builder.setNumNulls(boolData.getNumNulls());
+ builder.setBoolStats(HbaseMetastoreProto.ColumnStats.BooleanStats.newBuilder()
+ .setNumTrues(boolData.getNumTrues()).setNumFalses(boolData.getNumFalses()).build());
+ break;
+
+ case LONG_STATS:
+ LongColumnStatsData longData = colData.getLongStats();
+ builder.setNumNulls(longData.getNumNulls());
+ builder.setNumDistinctValues(longData.getNumDVs());
+ if (longData.isSetBitVectors()) {
+ builder.setBitVectors(longData.getBitVectors());
+ }
+ builder.setLongStats(HbaseMetastoreProto.ColumnStats.LongStats.newBuilder()
+ .setLowValue(longData.getLowValue()).setHighValue(longData.getHighValue()).build());
+ break;
+
+ case DOUBLE_STATS:
+ DoubleColumnStatsData doubleData = colData.getDoubleStats();
+ builder.setNumNulls(doubleData.getNumNulls());
+ builder.setNumDistinctValues(doubleData.getNumDVs());
+ if (doubleData.isSetBitVectors()) {
+ builder.setBitVectors(doubleData.getBitVectors());
+ }
+ builder.setDoubleStats(HbaseMetastoreProto.ColumnStats.DoubleStats.newBuilder()
+ .setLowValue(doubleData.getLowValue()).setHighValue(doubleData.getHighValue()).build());
+ break;
+
+ case STRING_STATS:
+ StringColumnStatsData stringData = colData.getStringStats();
+ builder.setNumNulls(stringData.getNumNulls());
+ builder.setNumDistinctValues(stringData.getNumDVs());
+ if (stringData.isSetBitVectors()) {
+ builder.setBitVectors(stringData.getBitVectors());
+ }
+ builder.setStringStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
+ .setMaxColLength(stringData.getMaxColLen()).setAvgColLength(stringData.getAvgColLen())
+ .build());
+ break;
+
+ case BINARY_STATS:
+ BinaryColumnStatsData binaryData = colData.getBinaryStats();
+ builder.setNumNulls(binaryData.getNumNulls());
+ builder.setBinaryStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder()
+ .setMaxColLength(binaryData.getMaxColLen()).setAvgColLength(binaryData.getAvgColLen())
+ .build());
+ break;
+
+ case DECIMAL_STATS:
+ DecimalColumnStatsData decimalData = colData.getDecimalStats();
+ builder.setNumNulls(decimalData.getNumNulls());
+ builder.setNumDistinctValues(decimalData.getNumDVs());
+ if (decimalData.isSetBitVectors()) {
+ builder.setBitVectors(decimalData.getBitVectors());
+ }
+ builder.setDecimalStats(
+ HbaseMetastoreProto.ColumnStats.DecimalStats
+ .newBuilder()
+ .setLowValue(
+ HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
.setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled()))
- .setScale(decimalData.getLowValue().getScale())
- .build())
- .setHighValue(
- HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
- .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
- .setScale(decimalData.getHighValue().getScale())
- .build()))
- .build();
- break;
-
- default:
- throw new RuntimeException("Woh, bad. Unknown stats type!");
+ .setScale(decimalData.getLowValue().getScale()).build())
+ .setHighValue(
+ HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+ .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
+ .setScale(decimalData.getHighValue().getScale()).build())).build();
+ break;
+
+ default:
+ throw new RuntimeException("Woh, bad. Unknown stats type!");
}
return builder.build();
}
@@ -1265,6 +1261,7 @@ class HBaseUtils {
}
longData.setNumNulls(proto.getNumNulls());
longData.setNumDVs(proto.getNumDistinctValues());
+ longData.setBitVectors(proto.getBitVectors());
colData.setLongStats(longData);
} else if (proto.hasDoubleStats()) {
DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
@@ -1276,6 +1273,7 @@ class HBaseUtils {
}
doubleData.setNumNulls(proto.getNumNulls());
doubleData.setNumDVs(proto.getNumDistinctValues());
+ doubleData.setBitVectors(proto.getBitVectors());
colData.setDoubleStats(doubleData);
} else if (proto.hasStringStats()) {
StringColumnStatsData stringData = new StringColumnStatsData();
@@ -1283,6 +1281,7 @@ class HBaseUtils {
stringData.setAvgColLen(proto.getStringStats().getAvgColLength());
stringData.setNumNulls(proto.getNumNulls());
stringData.setNumDVs(proto.getNumDistinctValues());
+ stringData.setBitVectors(proto.getBitVectors());
colData.setStringStats(stringData);
} else if (proto.hasBinaryStats()) {
BinaryColumnStatsData binaryData = new BinaryColumnStatsData();
@@ -1306,6 +1305,7 @@ class HBaseUtils {
}
decimalData.setNumNulls(proto.getNumNulls());
decimalData.setNumDVs(proto.getNumDistinctValues());
+ decimalData.setBitVectors(proto.getBitVectors());
colData.setDecimalStats(decimalData);
} else {
throw new RuntimeException("Woh, bad. Unknown stats type!");
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
index 5ec60be..f1d2e50 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
@@ -23,9 +23,11 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.protobuf.ByteString;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -72,7 +74,7 @@ class StatsCache {
return self;
}
- private StatsCache(Configuration conf) {
+ private StatsCache(final Configuration conf) {
final StatsCache me = this;
cache = CacheBuilder.newBuilder()
.maximumSize(
@@ -82,6 +84,7 @@ class StatsCache {
.build(new CacheLoader<StatsCacheKey, AggrStats>() {
@Override
public AggrStats load(StatsCacheKey key) throws Exception {
+ int numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
HBaseReadWrite hrw = HBaseReadWrite.getInstance();
AggrStats aggrStats = hrw.getAggregatedStats(key.hashed);
if (aggrStats == null) {
@@ -103,7 +106,7 @@ class StatsCache {
}
if (aggregator == null) {
aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(
- cso.getStatsData().getSetField());
+ cso.getStatsData().getSetField(), numBitVectors);
}
aggregator.aggregate(statsObj, cso);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
index bbd2c7b..40340dd 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
@@ -22,7 +22,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-public class BinaryColumnStatsAggregator implements ColumnStatsAggregator{
+public class BinaryColumnStatsAggregator extends ColumnStatsAggregator{
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
index 9047f68..735d965 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
@@ -22,7 +22,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-public class BooleanColumnStatsAggregator implements ColumnStatsAggregator {
+public class BooleanColumnStatsAggregator extends ColumnStatsAggregator {
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
index 217b654..694e53b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
@@ -19,8 +19,10 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-public interface ColumnStatsAggregator {
- public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats);
+public abstract class ColumnStatsAggregator {
+ NumDistinctValueEstimator ndvEstimator = null;
+ public abstract void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
index a8dbc1f..8eb127b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -34,23 +35,34 @@ public class ColumnStatsAggregatorFactory {
private ColumnStatsAggregatorFactory() {
}
- public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type) {
+ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int numBitVectors) {
+ ColumnStatsAggregator agg;
switch (type) {
case BOOLEAN_STATS:
- return new BooleanColumnStatsAggregator();
+ agg = new BooleanColumnStatsAggregator();
+ break;
case LONG_STATS:
- return new LongColumnStatsAggregator();
+ agg = new LongColumnStatsAggregator();
+ break;
case DOUBLE_STATS:
- return new DoubleColumnStatsAggregator();
+ agg = new DoubleColumnStatsAggregator();
+ break;
case STRING_STATS:
- return new StringColumnStatsAggregator();
+ agg = new StringColumnStatsAggregator();
+ break;
case BINARY_STATS:
- return new BinaryColumnStatsAggregator();
+ agg = new BinaryColumnStatsAggregator();
+ break;
case DECIMAL_STATS:
- return new DecimalColumnStatsAggregator();
+ agg = new DecimalColumnStatsAggregator();
+ break;
default:
throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString());
}
+ if (numBitVectors > 0) {
+ agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
+ }
+ return agg;
}
public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
index ec25b31..50f4325 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
@@ -19,25 +19,33 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-public class DecimalColumnStatsAggregator implements ColumnStatsAggregator {
+public class DecimalColumnStatsAggregator extends ColumnStatsAggregator {
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
- Decimal lowValue =
- (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
- .getLowValue() : newData.getLowValue();
+ Decimal lowValue = aggregateData.getLowValue() != null
+ && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
+ .getLowValue() : newData.getLowValue();
aggregateData.setLowValue(lowValue);
- Decimal highValue =
- (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
- .getHighValue() : newData.getHighValue();
+ Decimal highValue = aggregateData.getHighValue() != null
+ && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
+ .getHighValue() : newData.getHighValue();
aggregateData.setHighValue(highValue);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
index 71af0ac..d945ec2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
@@ -19,10 +19,11 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-public class DoubleColumnStatsAggregator implements ColumnStatsAggregator {
+public class DoubleColumnStatsAggregator extends ColumnStatsAggregator {
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class DoubleColumnStatsAggregator implements ColumnStatsAggregator {
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
index 15b8cf7..068dd00 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
@@ -19,10 +19,11 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-public class LongColumnStatsAggregator implements ColumnStatsAggregator {
+public class LongColumnStatsAggregator extends ColumnStatsAggregator {
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class LongColumnStatsAggregator implements ColumnStatsAggregator {
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
index fe1a04c..aeb6c39 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
@@ -19,10 +19,11 @@
package org.apache.hadoop.hive.metastore.hbase.stats;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
-public class StringColumnStatsAggregator implements ColumnStatsAggregator {
+public class StringColumnStatsAggregator extends ColumnStatsAggregator {
@Override
public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
@@ -31,6 +32,13 @@ public class StringColumnStatsAggregator implements ColumnStatsAggregator {
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
----------------------------------------------------------------------
diff --git a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
index 0d0ef89..466fdf9 100644
--- a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
+++ b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto
@@ -93,6 +93,7 @@ message ColumnStats {
optional StringStats binary_stats = 9;
optional DecimalStats decimal_stats = 10;
optional string column_name = 11;
+ optional string bit_vectors = 12;
}
message Database {
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
new file mode 100644
index 0000000..36c7984
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class TestHBaseAggregateStatsCacheWithBitVector {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(TestHBaseAggregateStatsCacheWithBitVector.class.getName());
+
+ @Mock
+ HTableInterface htable;
+ private HBaseStore store;
+ SortedMap<String, Cell> rows = new TreeMap<>();
+
+ @Before
+ public void before() throws IOException {
+ MockitoAnnotations.initMocks(this);
+ HiveConf conf = new HiveConf();
+ conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+ store = MockUtils.init(conf, htable, rows);
+ store.backdoor().getStatsCache().resetCounters();
+ }
+
+ private static interface Checker {
+ void checkStats(AggrStats aggrStats) throws Exception;
+ }
+
+ @Test
+ public void allPartitions() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ List<String> partVals1 = Arrays.asList("today");
+ List<String> partVals2 = Arrays.asList("yesterday");
+ long now = System.currentTimeMillis();
+
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "boolean", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVals1.get(0));
+ Partition part = new Partition(partVals1, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+
+ psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVals2.get(0));
+ part = new Partition(partVals2, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVals1.get(0));
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1");
+ obj.setColType("double");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
+ dcsd.setHighValue(1000.2342343);
+ dcsd.setLowValue(-20.1234213423);
+ dcsd.setNumNulls(30);
+ dcsd.setNumDVs(12342);
+ dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
+ data.setDoubleStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVals1);
+
+ cs = new ColumnStatistics();
+ desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVals2.get(0));
+ cs.setStatsDesc(desc);
+ obj = new ColumnStatisticsObj();
+ obj.setColName("col1");
+ obj.setColType("double");
+ data = new ColumnStatisticsData();
+ dcsd = new DoubleColumnStatsData();
+ dcsd.setHighValue(1000.2342343);
+ dcsd.setLowValue(-20.1234213423);
+ dcsd.setNumNulls(30);
+ dcsd.setNumDVs(12342);
+ dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
+ data.setDoubleStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+
+ store.updatePartitionColumnStatistics(cs, partVals2);
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(2, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1", cso.getColName());
+ Assert.assertEquals("double", cso.getColType());
+ DoubleColumnStatsData dcsd = cso.getStatsData().getDoubleStats();
+ Assert.assertEquals(1000.23, dcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-20.12, dcsd.getLowValue(), 0.01);
+ Assert.assertEquals(60, dcsd.getNumNulls());
+ Assert.assertEquals(5, dcsd.getNumDVs());
+ Assert
+ .assertEquals(
+ "{0, 1, 4, 5, 7}{0, 1}{0, 1, 2, 4}{0, 1, 2, 4}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1, 2, 3, 4}{0, 1, 4}{0, 1, 3, 4, 6}{0, 2}{0, 1, 3, 8}{0, 2, 3}{0, 2}{0, 1, 9}{0, 1, 4}",
+ dcsd.getBitVectors());
+ }
+ };
+
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName,
+ Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"));
+ statChecker.checkStats(aggrStats);
+
+ // Check that we had to build it from the stats
+ Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
+ Assert.assertEquals(1, store.backdoor().getStatsCache().totalGets.getCnt());
+ Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
+
+ // Call again, this time it should come from memory. Also, reverse the name
+ // order this time
+ // to assure that we still hit.
+ aggrStats = store.get_aggr_stats_for(dbName, tableName,
+ Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"));
+ statChecker.checkStats(aggrStats);
+
+ Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
+ Assert.assertEquals(2, store.backdoor().getStatsCache().totalGets.getCnt());
+ Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
+ }
+
+}
[3/4] hive git commit: HIVE-12763: Use bit vector to track NDV
(Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
index 6aa4668..de39d21 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java
@@ -41,6 +41,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
private static final org.apache.thrift.protocol.TField NUM_TRUES_FIELD_DESC = new org.apache.thrift.protocol.TField("numTrues", org.apache.thrift.protocol.TType.I64, (short)1);
private static final org.apache.thrift.protocol.TField NUM_FALSES_FIELD_DESC = new org.apache.thrift.protocol.TField("numFalses", org.apache.thrift.protocol.TType.I64, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -51,12 +52,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
private long numTrues; // required
private long numFalses; // required
private long numNulls; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
NUM_TRUES((short)1, "numTrues"),
NUM_FALSES((short)2, "numFalses"),
- NUM_NULLS((short)3, "numNulls");
+ NUM_NULLS((short)3, "numNulls"),
+ BIT_VECTORS((short)4, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -77,6 +80,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
return NUM_FALSES;
case 3: // NUM_NULLS
return NUM_NULLS;
+ case 4: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -121,6 +126,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
private static final int __NUMFALSES_ISSET_ID = 1;
private static final int __NUMNULLS_ISSET_ID = 2;
private byte __isset_bitfield = 0;
+ private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -130,6 +136,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BooleanColumnStatsData.class, metaDataMap);
}
@@ -159,6 +167,9 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
this.numTrues = other.numTrues;
this.numFalses = other.numFalses;
this.numNulls = other.numNulls;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public BooleanColumnStatsData deepCopy() {
@@ -173,6 +184,7 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
this.numFalses = 0;
setNumNullsIsSet(false);
this.numNulls = 0;
+ this.bitVectors = null;
}
public long getNumTrues() {
@@ -241,6 +253,29 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case NUM_TRUES:
@@ -267,6 +302,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -281,6 +324,9 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
case NUM_NULLS:
return getNumNulls();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -298,6 +344,8 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
return isSetNumFalses();
case NUM_NULLS:
return isSetNumNulls();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -342,6 +390,15 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -364,6 +421,11 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
if (present_numNulls)
list.add(numNulls);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -405,6 +467,16 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -436,6 +508,16 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
sb.append("numNulls:");
sb.append(this.numNulls);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -517,6 +599,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 4: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -539,6 +629,13 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC);
oprot.writeI64(struct.numNulls);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -559,6 +656,14 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
oprot.writeI64(struct.numTrues);
oprot.writeI64(struct.numFalses);
oprot.writeI64(struct.numNulls);
+ BitSet optionals = new BitSet();
+ if (struct.isSetBitVectors()) {
+ optionals.set(0);
+ }
+ oprot.writeBitSet(optionals, 1);
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -570,6 +675,11 @@ public class BooleanColumnStatsData implements org.apache.thrift.TBase<BooleanCo
struct.setNumFalsesIsSet(true);
struct.numNulls = iprot.readI64();
struct.setNumNullsIsSet(true);
+ BitSet incoming = iprot.readBitSet(1);
+ if (incoming.get(0)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
index 2ebb811..edc87a1 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java
@@ -42,6 +42,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -53,13 +54,15 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
private Date highValue; // optional
private long numNulls; // required
private long numDVs; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
LOW_VALUE((short)1, "lowValue"),
HIGH_VALUE((short)2, "highValue"),
NUM_NULLS((short)3, "numNulls"),
- NUM_DVS((short)4, "numDVs");
+ NUM_DVS((short)4, "numDVs"),
+ BIT_VECTORS((short)5, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -82,6 +85,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
return NUM_NULLS;
case 4: // NUM_DVS
return NUM_DVS;
+ case 5: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -125,7 +130,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
private static final int __NUMNULLS_ISSET_ID = 0;
private static final int __NUMDVS_ISSET_ID = 1;
private byte __isset_bitfield = 0;
- private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+ private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -137,6 +142,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DateColumnStatsData.class, metaDataMap);
}
@@ -168,6 +175,9 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
}
this.numNulls = other.numNulls;
this.numDVs = other.numDVs;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public DateColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
this.numNulls = 0;
setNumDVsIsSet(false);
this.numDVs = 0;
+ this.bitVectors = null;
}
public Date getLowValue() {
@@ -274,6 +285,29 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case LOW_VALUE:
@@ -308,6 +342,14 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -325,6 +367,9 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
case NUM_DVS:
return getNumDVs();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -344,6 +389,8 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
return isSetNumNulls();
case NUM_DVS:
return isSetNumDVs();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -397,6 +444,15 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -424,6 +480,11 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
if (present_numDVs)
list.add(numDVs);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -475,6 +536,16 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -522,6 +593,16 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
sb.append("numDVs:");
sb.append(this.numDVs);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -615,6 +696,14 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 5: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -648,6 +737,13 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
oprot.writeI64(struct.numDVs);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -674,13 +770,19 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
if (struct.isSetHighValue()) {
optionals.set(1);
}
- oprot.writeBitSet(optionals, 2);
+ if (struct.isSetBitVectors()) {
+ optionals.set(2);
+ }
+ oprot.writeBitSet(optionals, 3);
if (struct.isSetLowValue()) {
struct.lowValue.write(oprot);
}
if (struct.isSetHighValue()) {
struct.highValue.write(oprot);
}
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -690,7 +792,7 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
struct.setNumNullsIsSet(true);
struct.numDVs = iprot.readI64();
struct.setNumDVsIsSet(true);
- BitSet incoming = iprot.readBitSet(2);
+ BitSet incoming = iprot.readBitSet(3);
if (incoming.get(0)) {
struct.lowValue = new Date();
struct.lowValue.read(iprot);
@@ -701,6 +803,10 @@ public class DateColumnStatsData implements org.apache.thrift.TBase<DateColumnSt
struct.highValue.read(iprot);
struct.setHighValueIsSet(true);
}
+ if (incoming.get(2)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
index 720176a..ec363dc 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java
@@ -42,6 +42,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -53,13 +54,15 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
private Decimal highValue; // optional
private long numNulls; // required
private long numDVs; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
LOW_VALUE((short)1, "lowValue"),
HIGH_VALUE((short)2, "highValue"),
NUM_NULLS((short)3, "numNulls"),
- NUM_DVS((short)4, "numDVs");
+ NUM_DVS((short)4, "numDVs"),
+ BIT_VECTORS((short)5, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -82,6 +85,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
return NUM_NULLS;
case 4: // NUM_DVS
return NUM_DVS;
+ case 5: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -125,7 +130,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
private static final int __NUMNULLS_ISSET_ID = 0;
private static final int __NUMDVS_ISSET_ID = 1;
private byte __isset_bitfield = 0;
- private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+ private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -137,6 +142,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DecimalColumnStatsData.class, metaDataMap);
}
@@ -168,6 +175,9 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
}
this.numNulls = other.numNulls;
this.numDVs = other.numDVs;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public DecimalColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
this.numNulls = 0;
setNumDVsIsSet(false);
this.numDVs = 0;
+ this.bitVectors = null;
}
public Decimal getLowValue() {
@@ -274,6 +285,29 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case LOW_VALUE:
@@ -308,6 +342,14 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -325,6 +367,9 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
case NUM_DVS:
return getNumDVs();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -344,6 +389,8 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
return isSetNumNulls();
case NUM_DVS:
return isSetNumDVs();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -397,6 +444,15 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -424,6 +480,11 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
if (present_numDVs)
list.add(numDVs);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -475,6 +536,16 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -522,6 +593,16 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
sb.append("numDVs:");
sb.append(this.numDVs);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -615,6 +696,14 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 5: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -648,6 +737,13 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
oprot.writeI64(struct.numDVs);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -674,13 +770,19 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
if (struct.isSetHighValue()) {
optionals.set(1);
}
- oprot.writeBitSet(optionals, 2);
+ if (struct.isSetBitVectors()) {
+ optionals.set(2);
+ }
+ oprot.writeBitSet(optionals, 3);
if (struct.isSetLowValue()) {
struct.lowValue.write(oprot);
}
if (struct.isSetHighValue()) {
struct.highValue.write(oprot);
}
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -690,7 +792,7 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
struct.setNumNullsIsSet(true);
struct.numDVs = iprot.readI64();
struct.setNumDVsIsSet(true);
- BitSet incoming = iprot.readBitSet(2);
+ BitSet incoming = iprot.readBitSet(3);
if (incoming.get(0)) {
struct.lowValue = new Decimal();
struct.lowValue.read(iprot);
@@ -701,6 +803,10 @@ public class DecimalColumnStatsData implements org.apache.thrift.TBase<DecimalCo
struct.highValue.read(iprot);
struct.setHighValueIsSet(true);
}
+ if (incoming.get(2)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
index 5d48b5d..e3340e4 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java
@@ -42,6 +42,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -53,13 +54,15 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
private double highValue; // optional
private long numNulls; // required
private long numDVs; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
LOW_VALUE((short)1, "lowValue"),
HIGH_VALUE((short)2, "highValue"),
NUM_NULLS((short)3, "numNulls"),
- NUM_DVS((short)4, "numDVs");
+ NUM_DVS((short)4, "numDVs"),
+ BIT_VECTORS((short)5, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -82,6 +85,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
return NUM_NULLS;
case 4: // NUM_DVS
return NUM_DVS;
+ case 5: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -127,7 +132,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
private static final int __NUMNULLS_ISSET_ID = 2;
private static final int __NUMDVS_ISSET_ID = 3;
private byte __isset_bitfield = 0;
- private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+ private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -139,6 +144,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DoubleColumnStatsData.class, metaDataMap);
}
@@ -166,6 +173,9 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
this.highValue = other.highValue;
this.numNulls = other.numNulls;
this.numDVs = other.numDVs;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public DoubleColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
this.numNulls = 0;
setNumDVsIsSet(false);
this.numDVs = 0;
+ this.bitVectors = null;
}
public double getLowValue() {
@@ -272,6 +283,29 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case LOW_VALUE:
@@ -306,6 +340,14 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -323,6 +365,9 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
case NUM_DVS:
return getNumDVs();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -342,6 +387,8 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
return isSetNumNulls();
case NUM_DVS:
return isSetNumDVs();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -395,6 +442,15 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -422,6 +478,11 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
if (present_numDVs)
list.add(numDVs);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -473,6 +534,16 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -512,6 +583,16 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
sb.append("numDVs:");
sb.append(this.numDVs);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -597,6 +678,14 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 5: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -626,6 +715,13 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
oprot.writeI64(struct.numDVs);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -652,13 +748,19 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
if (struct.isSetHighValue()) {
optionals.set(1);
}
- oprot.writeBitSet(optionals, 2);
+ if (struct.isSetBitVectors()) {
+ optionals.set(2);
+ }
+ oprot.writeBitSet(optionals, 3);
if (struct.isSetLowValue()) {
oprot.writeDouble(struct.lowValue);
}
if (struct.isSetHighValue()) {
oprot.writeDouble(struct.highValue);
}
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -668,7 +770,7 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
struct.setNumNullsIsSet(true);
struct.numDVs = iprot.readI64();
struct.setNumDVsIsSet(true);
- BitSet incoming = iprot.readBitSet(2);
+ BitSet incoming = iprot.readBitSet(3);
if (incoming.get(0)) {
struct.lowValue = iprot.readDouble();
struct.setLowValueIsSet(true);
@@ -677,6 +779,10 @@ public class DoubleColumnStatsData implements org.apache.thrift.TBase<DoubleColu
struct.highValue = iprot.readDouble();
struct.setHighValueIsSet(true);
}
+ if (incoming.get(2)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
index 2f41c5a..4404706 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java
@@ -42,6 +42,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.I64, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -53,13 +54,15 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
private long highValue; // optional
private long numNulls; // required
private long numDVs; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
LOW_VALUE((short)1, "lowValue"),
HIGH_VALUE((short)2, "highValue"),
NUM_NULLS((short)3, "numNulls"),
- NUM_DVS((short)4, "numDVs");
+ NUM_DVS((short)4, "numDVs"),
+ BIT_VECTORS((short)5, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -82,6 +85,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
return NUM_NULLS;
case 4: // NUM_DVS
return NUM_DVS;
+ case 5: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -127,7 +132,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
private static final int __NUMNULLS_ISSET_ID = 2;
private static final int __NUMDVS_ISSET_ID = 3;
private byte __isset_bitfield = 0;
- private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE};
+ private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -139,6 +144,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LongColumnStatsData.class, metaDataMap);
}
@@ -166,6 +173,9 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
this.highValue = other.highValue;
this.numNulls = other.numNulls;
this.numDVs = other.numDVs;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public LongColumnStatsData deepCopy() {
@@ -182,6 +192,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
this.numNulls = 0;
setNumDVsIsSet(false);
this.numDVs = 0;
+ this.bitVectors = null;
}
public long getLowValue() {
@@ -272,6 +283,29 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case LOW_VALUE:
@@ -306,6 +340,14 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -323,6 +365,9 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
case NUM_DVS:
return getNumDVs();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -342,6 +387,8 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
return isSetNumNulls();
case NUM_DVS:
return isSetNumDVs();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -395,6 +442,15 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -422,6 +478,11 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
if (present_numDVs)
list.add(numDVs);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -473,6 +534,16 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -512,6 +583,16 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
sb.append("numDVs:");
sb.append(this.numDVs);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -597,6 +678,14 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 5: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -626,6 +715,13 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
oprot.writeI64(struct.numDVs);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -652,13 +748,19 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
if (struct.isSetHighValue()) {
optionals.set(1);
}
- oprot.writeBitSet(optionals, 2);
+ if (struct.isSetBitVectors()) {
+ optionals.set(2);
+ }
+ oprot.writeBitSet(optionals, 3);
if (struct.isSetLowValue()) {
oprot.writeI64(struct.lowValue);
}
if (struct.isSetHighValue()) {
oprot.writeI64(struct.highValue);
}
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -668,7 +770,7 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
struct.setNumNullsIsSet(true);
struct.numDVs = iprot.readI64();
struct.setNumDVsIsSet(true);
- BitSet incoming = iprot.readBitSet(2);
+ BitSet incoming = iprot.readBitSet(3);
if (incoming.get(0)) {
struct.lowValue = iprot.readI64();
struct.setLowValueIsSet(true);
@@ -677,6 +779,10 @@ public class LongColumnStatsData implements org.apache.thrift.TBase<LongColumnSt
struct.highValue = iprot.readI64();
struct.setHighValueIsSet(true);
}
+ if (incoming.get(2)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
index bd8a922..c9afe87 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java
@@ -42,6 +42,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2);
private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3);
private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4);
+ private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -53,13 +54,15 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
private double avgColLen; // required
private long numNulls; // required
private long numDVs; // required
+ private String bitVectors; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
MAX_COL_LEN((short)1, "maxColLen"),
AVG_COL_LEN((short)2, "avgColLen"),
NUM_NULLS((short)3, "numNulls"),
- NUM_DVS((short)4, "numDVs");
+ NUM_DVS((short)4, "numDVs"),
+ BIT_VECTORS((short)5, "bitVectors");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -82,6 +85,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
return NUM_NULLS;
case 4: // NUM_DVS
return NUM_DVS;
+ case 5: // BIT_VECTORS
+ return BIT_VECTORS;
default:
return null;
}
@@ -127,6 +132,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
private static final int __NUMNULLS_ISSET_ID = 2;
private static final int __NUMDVS_ISSET_ID = 3;
private byte __isset_bitfield = 0;
+ private static final _Fields optionals[] = {_Fields.BIT_VECTORS};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -138,6 +144,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+ tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StringColumnStatsData.class, metaDataMap);
}
@@ -171,6 +179,9 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
this.avgColLen = other.avgColLen;
this.numNulls = other.numNulls;
this.numDVs = other.numDVs;
+ if (other.isSetBitVectors()) {
+ this.bitVectors = other.bitVectors;
+ }
}
public StringColumnStatsData deepCopy() {
@@ -187,6 +198,7 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
this.numNulls = 0;
setNumDVsIsSet(false);
this.numDVs = 0;
+ this.bitVectors = null;
}
public long getMaxColLen() {
@@ -277,6 +289,29 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value);
}
+ public String getBitVectors() {
+ return this.bitVectors;
+ }
+
+ public void setBitVectors(String bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
+ public void unsetBitVectors() {
+ this.bitVectors = null;
+ }
+
+ /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */
+ public boolean isSetBitVectors() {
+ return this.bitVectors != null;
+ }
+
+ public void setBitVectorsIsSet(boolean value) {
+ if (!value) {
+ this.bitVectors = null;
+ }
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case MAX_COL_LEN:
@@ -311,6 +346,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
}
break;
+ case BIT_VECTORS:
+ if (value == null) {
+ unsetBitVectors();
+ } else {
+ setBitVectors((String)value);
+ }
+ break;
+
}
}
@@ -328,6 +371,9 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
case NUM_DVS:
return getNumDVs();
+ case BIT_VECTORS:
+ return getBitVectors();
+
}
throw new IllegalStateException();
}
@@ -347,6 +393,8 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
return isSetNumNulls();
case NUM_DVS:
return isSetNumDVs();
+ case BIT_VECTORS:
+ return isSetBitVectors();
}
throw new IllegalStateException();
}
@@ -400,6 +448,15 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
return false;
}
+ boolean this_present_bitVectors = true && this.isSetBitVectors();
+ boolean that_present_bitVectors = true && that.isSetBitVectors();
+ if (this_present_bitVectors || that_present_bitVectors) {
+ if (!(this_present_bitVectors && that_present_bitVectors))
+ return false;
+ if (!this.bitVectors.equals(that.bitVectors))
+ return false;
+ }
+
return true;
}
@@ -427,6 +484,11 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
if (present_numDVs)
list.add(numDVs);
+ boolean present_bitVectors = true && (isSetBitVectors());
+ list.add(present_bitVectors);
+ if (present_bitVectors)
+ list.add(bitVectors);
+
return list.hashCode();
}
@@ -478,6 +540,16 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetBitVectors()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -513,6 +585,16 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
sb.append("numDVs:");
sb.append(this.numDVs);
first = false;
+ if (isSetBitVectors()) {
+ if (!first) sb.append(", ");
+ sb.append("bitVectors:");
+ if (this.bitVectors == null) {
+ sb.append("null");
+ } else {
+ sb.append(this.bitVectors);
+ }
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -606,6 +688,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 5: // BIT_VECTORS
+ if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -631,6 +721,13 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
oprot.writeFieldBegin(NUM_DVS_FIELD_DESC);
oprot.writeI64(struct.numDVs);
oprot.writeFieldEnd();
+ if (struct.bitVectors != null) {
+ if (struct.isSetBitVectors()) {
+ oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC);
+ oprot.writeString(struct.bitVectors);
+ oprot.writeFieldEnd();
+ }
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -652,6 +749,14 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
oprot.writeDouble(struct.avgColLen);
oprot.writeI64(struct.numNulls);
oprot.writeI64(struct.numDVs);
+ BitSet optionals = new BitSet();
+ if (struct.isSetBitVectors()) {
+ optionals.set(0);
+ }
+ oprot.writeBitSet(optionals, 1);
+ if (struct.isSetBitVectors()) {
+ oprot.writeString(struct.bitVectors);
+ }
}
@Override
@@ -665,6 +770,11 @@ public class StringColumnStatsData implements org.apache.thrift.TBase<StringColu
struct.setNumNullsIsSet(true);
struct.numDVs = iprot.readI64();
struct.setNumDVsIsSet(true);
+ BitSet incoming = iprot.readBitSet(1);
+ if (incoming.get(0)) {
+ struct.bitVectors = iprot.readString();
+ struct.setBitVectorsIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/gen/thrift/gen-php/metastore/Types.php
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php
index 380e6d0..57d1daf 100644
--- a/metastore/src/gen/thrift/gen-php/metastore/Types.php
+++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php
@@ -5624,6 +5624,10 @@ class BooleanColumnStatsData {
* @var int
*/
public $numNulls = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -5640,6 +5644,10 @@ class BooleanColumnStatsData {
'var' => 'numNulls',
'type' => TType::I64,
),
+ 4 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -5652,6 +5660,9 @@ class BooleanColumnStatsData {
if (isset($vals['numNulls'])) {
$this->numNulls = $vals['numNulls'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -5695,6 +5706,13 @@ class BooleanColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 4:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -5723,6 +5741,11 @@ class BooleanColumnStatsData {
$xfer += $output->writeI64($this->numNulls);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -5749,6 +5772,10 @@ class DoubleColumnStatsData {
* @var int
*/
public $numDVs = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -5769,6 +5796,10 @@ class DoubleColumnStatsData {
'var' => 'numDVs',
'type' => TType::I64,
),
+ 5 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -5784,6 +5815,9 @@ class DoubleColumnStatsData {
if (isset($vals['numDVs'])) {
$this->numDVs = $vals['numDVs'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -5834,6 +5868,13 @@ class DoubleColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 5:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -5867,6 +5908,11 @@ class DoubleColumnStatsData {
$xfer += $output->writeI64($this->numDVs);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -5893,6 +5939,10 @@ class LongColumnStatsData {
* @var int
*/
public $numDVs = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -5913,6 +5963,10 @@ class LongColumnStatsData {
'var' => 'numDVs',
'type' => TType::I64,
),
+ 5 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -5928,6 +5982,9 @@ class LongColumnStatsData {
if (isset($vals['numDVs'])) {
$this->numDVs = $vals['numDVs'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -5978,6 +6035,13 @@ class LongColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 5:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -6011,6 +6075,11 @@ class LongColumnStatsData {
$xfer += $output->writeI64($this->numDVs);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -6037,6 +6106,10 @@ class StringColumnStatsData {
* @var int
*/
public $numDVs = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -6057,6 +6130,10 @@ class StringColumnStatsData {
'var' => 'numDVs',
'type' => TType::I64,
),
+ 5 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -6072,6 +6149,9 @@ class StringColumnStatsData {
if (isset($vals['numDVs'])) {
$this->numDVs = $vals['numDVs'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -6122,6 +6202,13 @@ class StringColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 5:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -6155,6 +6242,11 @@ class StringColumnStatsData {
$xfer += $output->writeI64($this->numDVs);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -6177,6 +6269,10 @@ class BinaryColumnStatsData {
* @var int
*/
public $numNulls = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -6193,6 +6289,10 @@ class BinaryColumnStatsData {
'var' => 'numNulls',
'type' => TType::I64,
),
+ 4 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -6205,6 +6305,9 @@ class BinaryColumnStatsData {
if (isset($vals['numNulls'])) {
$this->numNulls = $vals['numNulls'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -6248,6 +6351,13 @@ class BinaryColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 4:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -6276,6 +6386,11 @@ class BinaryColumnStatsData {
$xfer += $output->writeI64($this->numNulls);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -6400,6 +6515,10 @@ class DecimalColumnStatsData {
* @var int
*/
public $numDVs = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -6422,6 +6541,10 @@ class DecimalColumnStatsData {
'var' => 'numDVs',
'type' => TType::I64,
),
+ 5 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -6437,6 +6560,9 @@ class DecimalColumnStatsData {
if (isset($vals['numDVs'])) {
$this->numDVs = $vals['numDVs'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -6489,6 +6615,13 @@ class DecimalColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 5:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -6528,6 +6661,11 @@ class DecimalColumnStatsData {
$xfer += $output->writeI64($this->numDVs);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
@@ -6629,6 +6767,10 @@ class DateColumnStatsData {
* @var int
*/
public $numDVs = null;
+ /**
+ * @var string
+ */
+ public $bitVectors = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -6651,6 +6793,10 @@ class DateColumnStatsData {
'var' => 'numDVs',
'type' => TType::I64,
),
+ 5 => array(
+ 'var' => 'bitVectors',
+ 'type' => TType::STRING,
+ ),
);
}
if (is_array($vals)) {
@@ -6666,6 +6812,9 @@ class DateColumnStatsData {
if (isset($vals['numDVs'])) {
$this->numDVs = $vals['numDVs'];
}
+ if (isset($vals['bitVectors'])) {
+ $this->bitVectors = $vals['bitVectors'];
+ }
}
}
@@ -6718,6 +6867,13 @@ class DateColumnStatsData {
$xfer += $input->skip($ftype);
}
break;
+ case 5:
+ if ($ftype == TType::STRING) {
+ $xfer += $input->readString($this->bitVectors);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -6757,6 +6913,11 @@ class DateColumnStatsData {
$xfer += $output->writeI64($this->numDVs);
$xfer += $output->writeFieldEnd();
}
+ if ($this->bitVectors !== null) {
+ $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5);
+ $xfer += $output->writeString($this->bitVectors);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;