You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/11 22:43:39 UTC
hive git commit: HIVE-13565: Auto-gather column stats thrift change
(Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 6187e2a6b -> b9e4fe856
HIVE-13565: Auto-gather column stats thrift change (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9e4fe85
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9e4fe85
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9e4fe85
Branch: refs/heads/master
Commit: b9e4fe856fcf3bb4339c8efebab1138c9dc1e732
Parents: 6187e2a
Author: Pengcheng Xiong <px...@apache.org>
Authored: Wed May 11 15:43:15 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Wed May 11 15:43:15 2016 -0700
----------------------------------------------------------------------
.../metastore/TestHiveMetaStoreStatsMerge.java | 199 +++++++++++++++++++
metastore/if/hive_metastore.thrift | 3 +-
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 25 +++
.../gen/thrift/gen-cpp/hive_metastore_types.h | 15 +-
.../api/SetPartitionsStatsRequest.java | 109 +++++++++-
.../src/gen/thrift/gen-php/metastore/Types.php | 23 +++
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 15 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 4 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 62 +++++-
.../hive/metastore/HiveMetaStoreClient.java | 4 +
.../hadoop/hive/metastore/MetaStoreUtils.java | 37 ++++
.../stats/merge/BinaryColumnStatsMerger.java | 35 ++++
.../stats/merge/BooleanColumnStatsMerger.java | 35 ++++
.../hbase/stats/merge/ColumnStatsMerger.java | 34 ++++
.../stats/merge/ColumnStatsMergerFactory.java | 138 +++++++++++++
.../stats/merge/DecimalColumnStatsMerger.java | 55 +++++
.../stats/merge/DoubleColumnStatsMerger.java | 48 +++++
.../stats/merge/LongColumnStatsMerger.java | 48 +++++
.../stats/merge/StringColumnStatsMerger.java | 49 +++++
.../hadoop/hive/ql/exec/ColumnStatsTask.java | 26 +--
.../hive/ql/exec/ColumnStatsUpdateTask.java | 32 +--
.../apache/hadoop/hive/ql/metadata/Hive.java | 18 --
.../ql/metadata/SessionHiveMetaStoreClient.java | 18 +-
.../hadoop/hive/ql/plan/ColumnStatsDesc.java | 22 +-
24 files changed, 981 insertions(+), 73 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
new file mode 100644
index 0000000..d6df32b
--- /dev/null
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.events.AddPartitionEvent;
+import org.apache.hadoop.hive.metastore.events.AlterTableEvent;
+import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent;
+import org.apache.hadoop.hive.metastore.events.CreateTableEvent;
+import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent;
+import org.apache.hadoop.hive.metastore.events.DropPartitionEvent;
+import org.apache.hadoop.hive.metastore.events.DropTableEvent;
+import org.apache.hadoop.hive.metastore.events.ListenerEvent;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.shims.ShimLoader;
+
+/**
+ * TestHiveMetaStoreStatsMerge.
+ * calls in {@link org.apache.hadoop.hive.metastore.HiveMetaStore}
+ */
+public class TestHiveMetaStoreStatsMerge extends TestCase {
+
+ private HiveConf hiveConf;
+ private HiveMetaStoreClient msc;
+ private final Database db = new Database();
+ private Table table = new Table();
+
+ private static final String dbName = "hive3252";
+ private static final String tblName = "tmptbl";
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ System.setProperty("hive.metastore.event.listeners",
+ DummyListener.class.getName());
+
+ int port = MetaStoreUtils.findFreePort();
+ MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge());
+
+ hiveConf = new HiveConf(this.getClass());
+ hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port);
+ hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
+ hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
+ hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
+ hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
+ SessionState.start(new CliSessionState(hiveConf));
+ msc = new HiveMetaStoreClient(hiveConf);
+
+ msc.dropDatabase(dbName, true, true);
+
+ db.setName(dbName);
+
+ Map<String, String> tableParams = new HashMap<String, String>();
+ tableParams.put("a", "string");
+
+ List<FieldSchema> cols = new ArrayList<FieldSchema>();
+ cols.add(new FieldSchema("a", "string", ""));
+ StorageDescriptor sd = new StorageDescriptor();
+ sd.setCols(cols);
+ sd.setCompressed(false);
+ sd.setParameters(tableParams);
+ sd.setSerdeInfo(new SerDeInfo());
+ sd.getSerdeInfo().setName(tblName);
+ sd.getSerdeInfo().setParameters(new HashMap<String, String>());
+ sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
+ sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
+ sd.setInputFormat(HiveInputFormat.class.getName());
+ sd.setOutputFormat(HiveOutputFormat.class.getName());
+
+ table.setDbName(dbName);
+ table.setTableName(tblName);
+ table.setParameters(tableParams);
+ table.setSd(sd);
+
+ DummyListener.notifyList.clear();
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ public void testStatsMerge() throws Exception {
+ int listSize = 0;
+
+ List<ListenerEvent> notifyList = DummyListener.notifyList;
+ assertEquals(notifyList.size(), listSize);
+ msc.createDatabase(db);
+ listSize++;
+ assertEquals(listSize, notifyList.size());
+ CreateDatabaseEvent dbEvent = (CreateDatabaseEvent)(notifyList.get(listSize - 1));
+ assert dbEvent.getStatus();
+
+ msc.createTable(table);
+ listSize++;
+ assertEquals(notifyList.size(), listSize);
+ CreateTableEvent tblEvent = (CreateTableEvent)(notifyList.get(listSize - 1));
+ assert tblEvent.getStatus();
+
+ table = msc.getTable(dbName, tblName);
+
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("a");
+ obj.setColType("string");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ StringColumnStatsData scsd = new StringColumnStatsData();
+ scsd.setAvgColLen(10);
+ scsd.setMaxColLen(20);
+ scsd.setNumNulls(30);
+ scsd.setNumDVs(123);
+ scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
+ data.setStringStats(scsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+
+ List<ColumnStatistics> colStats = new ArrayList<>();
+ colStats.add(cs);
+
+ SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+ msc.setPartitionColumnStatistics(request);
+
+ List<String> colNames = new ArrayList<>();
+ colNames.add("a");
+
+ StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0)
+ .getStatsData().getStringStats();
+ assertEquals(getScsd.getNumDVs(), 123);
+
+ cs = new ColumnStatistics();
+ scsd = new StringColumnStatsData();
+ scsd.setAvgColLen(20);
+ scsd.setMaxColLen(5);
+ scsd.setNumNulls(70);
+ scsd.setNumDVs(456);
+ scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
+ data.setStringStats(scsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+
+ request = new SetPartitionsStatsRequest(colStats);
+ request.setNeedMerge(true);
+ msc.setPartitionColumnStatistics(request);
+
+ getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0)
+ .getStatsData().getStringStats();
+ assertEquals(getScsd.getAvgColLen(), 20.0);
+ assertEquals(getScsd.getMaxColLen(), 20);
+ assertEquals(getScsd.getNumNulls(), 100);
+ // since metastore is ObjectStore, we use the max function to merge.
+ assertEquals(getScsd.getNumDVs(), 456);
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift
index 84e9b6d..f8e56c7 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -449,7 +449,8 @@ struct AggrStats {
}
struct SetPartitionsStatsRequest {
-1: required list<ColumnStatistics> colStats
+1: required list<ColumnStatistics> colStats,
+2: optional bool needMerge //stats need to be merged with the existing stats
}
// schema of the table/query results etc.
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
index ad5da3e..cd8c552 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
@@ -8165,6 +8165,11 @@ void SetPartitionsStatsRequest::__set_colStats(const std::vector<ColumnStatistic
this->colStats = val;
}
+void SetPartitionsStatsRequest::__set_needMerge(const bool val) {
+ this->needMerge = val;
+__isset.needMerge = true;
+}
+
uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* iprot) {
apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -8207,6 +8212,14 @@ uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol*
xfer += iprot->skip(ftype);
}
break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_BOOL) {
+ xfer += iprot->readBool(this->needMerge);
+ this->__isset.needMerge = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
default:
xfer += iprot->skip(ftype);
break;
@@ -8238,6 +8251,11 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol*
}
xfer += oprot->writeFieldEnd();
+ if (this->__isset.needMerge) {
+ xfer += oprot->writeFieldBegin("needMerge", ::apache::thrift::protocol::T_BOOL, 2);
+ xfer += oprot->writeBool(this->needMerge);
+ xfer += oprot->writeFieldEnd();
+ }
xfer += oprot->writeFieldStop();
xfer += oprot->writeStructEnd();
return xfer;
@@ -8246,19 +8264,26 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol*
void swap(SetPartitionsStatsRequest &a, SetPartitionsStatsRequest &b) {
using ::std::swap;
swap(a.colStats, b.colStats);
+ swap(a.needMerge, b.needMerge);
+ swap(a.__isset, b.__isset);
}
SetPartitionsStatsRequest::SetPartitionsStatsRequest(const SetPartitionsStatsRequest& other329) {
colStats = other329.colStats;
+ needMerge = other329.needMerge;
+ __isset = other329.__isset;
}
SetPartitionsStatsRequest& SetPartitionsStatsRequest::operator=(const SetPartitionsStatsRequest& other330) {
colStats = other330.colStats;
+ needMerge = other330.needMerge;
+ __isset = other330.__isset;
return *this;
}
void SetPartitionsStatsRequest::printTo(std::ostream& out) const {
using ::apache::thrift::to_string;
out << "SetPartitionsStatsRequest(";
out << "colStats=" << to_string(colStats);
+ out << ", " << "needMerge="; (__isset.needMerge ? (out << to_string(needMerge)) : (out << "<null>"));
out << ")";
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
index b5c4f14..883f266 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
@@ -3464,24 +3464,37 @@ inline std::ostream& operator<<(std::ostream& out, const AggrStats& obj)
return out;
}
+typedef struct _SetPartitionsStatsRequest__isset {
+ _SetPartitionsStatsRequest__isset() : needMerge(false) {}
+ bool needMerge :1;
+} _SetPartitionsStatsRequest__isset;
class SetPartitionsStatsRequest {
public:
SetPartitionsStatsRequest(const SetPartitionsStatsRequest&);
SetPartitionsStatsRequest& operator=(const SetPartitionsStatsRequest&);
- SetPartitionsStatsRequest() {
+ SetPartitionsStatsRequest() : needMerge(0) {
}
virtual ~SetPartitionsStatsRequest() throw();
std::vector<ColumnStatistics> colStats;
+ bool needMerge;
+
+ _SetPartitionsStatsRequest__isset __isset;
void __set_colStats(const std::vector<ColumnStatistics> & val);
+ void __set_needMerge(const bool val);
+
bool operator == (const SetPartitionsStatsRequest & rhs) const
{
if (!(colStats == rhs.colStats))
return false;
+ if (__isset.needMerge != rhs.__isset.needMerge)
+ return false;
+ else if (__isset.needMerge && !(needMerge == rhs.needMerge))
+ return false;
return true;
}
bool operator != (const SetPartitionsStatsRequest &rhs) const {
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
index 6e334f6..c8088b4 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
@@ -39,6 +39,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("SetPartitionsStatsRequest");
private static final org.apache.thrift.protocol.TField COL_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("colStats", org.apache.thrift.protocol.TType.LIST, (short)1);
+ private static final org.apache.thrift.protocol.TField NEED_MERGE_FIELD_DESC = new org.apache.thrift.protocol.TField("needMerge", org.apache.thrift.protocol.TType.BOOL, (short)2);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -47,10 +48,12 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
private List<ColumnStatistics> colStats; // required
+ private boolean needMerge; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
- COL_STATS((short)1, "colStats");
+ COL_STATS((short)1, "colStats"),
+ NEED_MERGE((short)2, "needMerge");
private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
@@ -67,6 +70,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
switch(fieldId) {
case 1: // COL_STATS
return COL_STATS;
+ case 2: // NEED_MERGE
+ return NEED_MERGE;
default:
return null;
}
@@ -107,12 +112,17 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
// isset id assignments
+ private static final int __NEEDMERGE_ISSET_ID = 0;
+ private byte __isset_bitfield = 0;
+ private static final _Fields optionals[] = {_Fields.NEED_MERGE};
public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
static {
Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
tmpMap.put(_Fields.COL_STATS, new org.apache.thrift.meta_data.FieldMetaData("colStats", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST,
new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, ColumnStatistics.class))));
+ tmpMap.put(_Fields.NEED_MERGE, new org.apache.thrift.meta_data.FieldMetaData("needMerge", org.apache.thrift.TFieldRequirementType.OPTIONAL,
+ new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(SetPartitionsStatsRequest.class, metaDataMap);
}
@@ -131,6 +141,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
* Performs a deep copy on <i>other</i>.
*/
public SetPartitionsStatsRequest(SetPartitionsStatsRequest other) {
+ __isset_bitfield = other.__isset_bitfield;
if (other.isSetColStats()) {
List<ColumnStatistics> __this__colStats = new ArrayList<ColumnStatistics>(other.colStats.size());
for (ColumnStatistics other_element : other.colStats) {
@@ -138,6 +149,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
this.colStats = __this__colStats;
}
+ this.needMerge = other.needMerge;
}
public SetPartitionsStatsRequest deepCopy() {
@@ -147,6 +159,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
@Override
public void clear() {
this.colStats = null;
+ setNeedMergeIsSet(false);
+ this.needMerge = false;
}
public int getColStatsSize() {
@@ -187,6 +201,28 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
}
+ public boolean isNeedMerge() {
+ return this.needMerge;
+ }
+
+ public void setNeedMerge(boolean needMerge) {
+ this.needMerge = needMerge;
+ setNeedMergeIsSet(true);
+ }
+
+ public void unsetNeedMerge() {
+ __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NEEDMERGE_ISSET_ID);
+ }
+
+ /** Returns true if field needMerge is set (has been assigned a value) and false otherwise */
+ public boolean isSetNeedMerge() {
+ return EncodingUtils.testBit(__isset_bitfield, __NEEDMERGE_ISSET_ID);
+ }
+
+ public void setNeedMergeIsSet(boolean value) {
+ __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NEEDMERGE_ISSET_ID, value);
+ }
+
public void setFieldValue(_Fields field, Object value) {
switch (field) {
case COL_STATS:
@@ -197,6 +233,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
break;
+ case NEED_MERGE:
+ if (value == null) {
+ unsetNeedMerge();
+ } else {
+ setNeedMerge((Boolean)value);
+ }
+ break;
+
}
}
@@ -205,6 +249,9 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
case COL_STATS:
return getColStats();
+ case NEED_MERGE:
+ return isNeedMerge();
+
}
throw new IllegalStateException();
}
@@ -218,6 +265,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
switch (field) {
case COL_STATS:
return isSetColStats();
+ case NEED_MERGE:
+ return isSetNeedMerge();
}
throw new IllegalStateException();
}
@@ -244,6 +293,15 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
return false;
}
+ boolean this_present_needMerge = true && this.isSetNeedMerge();
+ boolean that_present_needMerge = true && that.isSetNeedMerge();
+ if (this_present_needMerge || that_present_needMerge) {
+ if (!(this_present_needMerge && that_present_needMerge))
+ return false;
+ if (this.needMerge != that.needMerge)
+ return false;
+ }
+
return true;
}
@@ -256,6 +314,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
if (present_colStats)
list.add(colStats);
+ boolean present_needMerge = true && (isSetNeedMerge());
+ list.add(present_needMerge);
+ if (present_needMerge)
+ list.add(needMerge);
+
return list.hashCode();
}
@@ -277,6 +340,16 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
return lastComparison;
}
}
+ lastComparison = Boolean.valueOf(isSetNeedMerge()).compareTo(other.isSetNeedMerge());
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ if (isSetNeedMerge()) {
+ lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.needMerge, other.needMerge);
+ if (lastComparison != 0) {
+ return lastComparison;
+ }
+ }
return 0;
}
@@ -304,6 +377,12 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
sb.append(this.colStats);
}
first = false;
+ if (isSetNeedMerge()) {
+ if (!first) sb.append(", ");
+ sb.append("needMerge:");
+ sb.append(this.needMerge);
+ first = false;
+ }
sb.append(")");
return sb.toString();
}
@@ -327,6 +406,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
try {
+ // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+ __isset_bitfield = 0;
read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
} catch (org.apache.thrift.TException te) {
throw new java.io.IOException(te);
@@ -370,6 +451,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
break;
+ case 2: // NEED_MERGE
+ if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+ struct.needMerge = iprot.readBool();
+ struct.setNeedMergeIsSet(true);
+ } else {
+ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+ }
+ break;
default:
org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
}
@@ -395,6 +484,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
oprot.writeFieldEnd();
}
+ if (struct.isSetNeedMerge()) {
+ oprot.writeFieldBegin(NEED_MERGE_FIELD_DESC);
+ oprot.writeBool(struct.needMerge);
+ oprot.writeFieldEnd();
+ }
oprot.writeFieldStop();
oprot.writeStructEnd();
}
@@ -419,6 +513,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
_iter290.write(oprot);
}
}
+ BitSet optionals = new BitSet();
+ if (struct.isSetNeedMerge()) {
+ optionals.set(0);
+ }
+ oprot.writeBitSet(optionals, 1);
+ if (struct.isSetNeedMerge()) {
+ oprot.writeBool(struct.needMerge);
+ }
}
@Override
@@ -436,6 +538,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
}
}
struct.setColStatsIsSet(true);
+ BitSet incoming = iprot.readBitSet(1);
+ if (incoming.get(0)) {
+ struct.needMerge = iprot.readBool();
+ struct.setNeedMergeIsSet(true);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-php/metastore/Types.php
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php
index f67e61f..189894d 100644
--- a/metastore/src/gen/thrift/gen-php/metastore/Types.php
+++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php
@@ -8340,6 +8340,10 @@ class SetPartitionsStatsRequest {
* @var \metastore\ColumnStatistics[]
*/
public $colStats = null;
+ /**
+ * @var bool
+ */
+ public $needMerge = null;
public function __construct($vals=null) {
if (!isset(self::$_TSPEC)) {
@@ -8353,12 +8357,19 @@ class SetPartitionsStatsRequest {
'class' => '\metastore\ColumnStatistics',
),
),
+ 2 => array(
+ 'var' => 'needMerge',
+ 'type' => TType::BOOL,
+ ),
);
}
if (is_array($vals)) {
if (isset($vals['colStats'])) {
$this->colStats = $vals['colStats'];
}
+ if (isset($vals['needMerge'])) {
+ $this->needMerge = $vals['needMerge'];
+ }
}
}
@@ -8399,6 +8410,13 @@ class SetPartitionsStatsRequest {
$xfer += $input->skip($ftype);
}
break;
+ case 2:
+ if ($ftype == TType::BOOL) {
+ $xfer += $input->readBool($this->needMerge);
+ } else {
+ $xfer += $input->skip($ftype);
+ }
+ break;
default:
$xfer += $input->skip($ftype);
break;
@@ -8429,6 +8447,11 @@ class SetPartitionsStatsRequest {
}
$xfer += $output->writeFieldEnd();
}
+ if ($this->needMerge !== null) {
+ $xfer += $output->writeFieldBegin('needMerge', TType::BOOL, 2);
+ $xfer += $output->writeBool($this->needMerge);
+ $xfer += $output->writeFieldEnd();
+ }
$xfer += $output->writeFieldStop();
$xfer += $output->writeStructEnd();
return $xfer;
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
index b47bb59..6366a81 100644
--- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
+++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
@@ -5674,15 +5674,18 @@ class SetPartitionsStatsRequest:
"""
Attributes:
- colStats
+ - needMerge
"""
thrift_spec = (
None, # 0
(1, TType.LIST, 'colStats', (TType.STRUCT,(ColumnStatistics, ColumnStatistics.thrift_spec)), None, ), # 1
+ (2, TType.BOOL, 'needMerge', None, None, ), # 2
)
- def __init__(self, colStats=None,):
+ def __init__(self, colStats=None, needMerge=None,):
self.colStats = colStats
+ self.needMerge = needMerge
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -5704,6 +5707,11 @@ class SetPartitionsStatsRequest:
iprot.readListEnd()
else:
iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.BOOL:
+ self.needMerge = iprot.readBool()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -5721,6 +5729,10 @@ class SetPartitionsStatsRequest:
iter259.write(oprot)
oprot.writeListEnd()
oprot.writeFieldEnd()
+ if self.needMerge is not None:
+ oprot.writeFieldBegin('needMerge', TType.BOOL, 2)
+ oprot.writeBool(self.needMerge)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -5733,6 +5745,7 @@ class SetPartitionsStatsRequest:
def __hash__(self):
value = 17
value = (value * 31) ^ hash(self.colStats)
+ value = (value * 31) ^ hash(self.needMerge)
return value
def __repr__(self):
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
index 2aa92d8..e8d60d7 100644
--- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
+++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
@@ -1262,9 +1262,11 @@ end
class SetPartitionsStatsRequest
include ::Thrift::Struct, ::Thrift::Struct_Union
COLSTATS = 1
+ NEEDMERGE = 2
FIELDS = {
- COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}}
+ COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}},
+ NEEDMERGE => {:type => ::Thrift::Types::BOOL, :name => 'needMerge', :optional => true}
}
def struct_fields; FIELDS; end
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 4b92b2a..94dd72e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -4421,7 +4421,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
try {
statsObj = getMS().getTableColumnStatistics(
dbName, tableName, Lists.newArrayList(colName));
- assert statsObj.getStatsObjSize() <= 1;
+ if (statsObj != null) {
+ assert statsObj.getStatsObjSize() <= 1;
+ }
return statsObj;
} finally {
endFunction("get_column_statistics_by_table: ", statsObj != null, null, tableName);
@@ -6009,8 +6011,62 @@ public class HiveMetaStore extends ThriftHiveMetastore {
throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException,
TException {
boolean ret = true;
- for (ColumnStatistics colStats : request.getColStats()) {
- ret = ret && update_partition_column_statistics(colStats);
+ List<ColumnStatistics> csNews = request.getColStats();
+ if (csNews == null || csNews.isEmpty()) {
+ return ret;
+ }
+ // figure out if it is table level or partition level
+ ColumnStatistics firstColStats = csNews.get(0);
+ ColumnStatisticsDesc statsDesc = firstColStats.getStatsDesc();
+ String dbName = statsDesc.getDbName();
+ String tableName = statsDesc.getTableName();
+ List<String> colNames = new ArrayList<>();
+ for (ColumnStatisticsObj obj : firstColStats.getStatsObj()) {
+ colNames.add(obj.getColName());
+ }
+ if (statsDesc.isIsTblLevel()) {
+ // there should be only one ColumnStatistics
+ if (request.getColStatsSize() != 1) {
+ throw new MetaException(
+ "Expecting only 1 ColumnStatistics for table's column stats, but find "
+ + request.getColStatsSize());
+ } else {
+ if (request.isSetNeedMerge() && request.isNeedMerge()) {
+ // one single call to get all column stats
+ ColumnStatistics csOld = getMS().getTableColumnStatistics(dbName, tableName, colNames);
+ if (csOld != null && csOld.getStatsObjSize() != 0) {
+ MetaStoreUtils.mergeColStats(firstColStats, csOld);
+ }
+ }
+ return update_table_column_statistics(firstColStats);
+ }
+ } else {
+ // partition level column stats merging
+ List<String> partitionNames = new ArrayList<>();
+ for (ColumnStatistics csNew : csNews) {
+ partitionNames.add(csNew.getStatsDesc().getPartName());
+ }
+ Map<String, ColumnStatistics> map = new HashMap<>();
+ if (request.isSetNeedMerge() && request.isNeedMerge()) {
+ // a single call to get all column stats for all partitions
+ List<ColumnStatistics> csOlds = getMS().getPartitionColumnStatistics(dbName, tableName,
+ partitionNames, colNames);
+ if (csNews.size() != csOlds.size()) {
+ // some of the partitions miss stats.
+ LOG.debug("Some of the partitions miss stats.");
+ }
+ for (ColumnStatistics csOld : csOlds) {
+ map.put(csOld.getStatsDesc().getPartName(), csOld);
+ }
+ }
+ for (int index = 0; index < csNews.size(); index++) {
+ ColumnStatistics csNew = csNews.get(index);
+ ColumnStatistics csOld = map.get(csNew.getStatsDesc().getPartName());
+ if (csOld != null && csOld.getStatsObjSize() != 0) {
+ MetaStoreUtils.mergeColStats(csNew, csOld);
+ }
+ ret = ret && update_partition_column_statistics(csNew);
+ }
}
return ret;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 682796d..2e83ee0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -1593,6 +1593,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
/** {@inheritDoc} */
@Override
+ @Deprecated
+ //use setPartitionColumnStatistics instead
public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
InvalidInputException{
@@ -1601,6 +1603,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
/** {@inheritDoc} */
@Override
+ @Deprecated
+ //use setPartitionColumnStatistics instead
public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj)
throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
InvalidInputException{
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index e01fe45..6bc882a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -55,9 +55,12 @@ import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
@@ -65,6 +68,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
+import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
@@ -1811,4 +1816,36 @@ public class MetaStoreUtils {
return ret;
}
+ // this function will merge csOld into csNew.
+ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld)
+ throws InvalidObjectException {
+ List<ColumnStatisticsObj> list = new ArrayList<>();
+ if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) {
+ // Some of the columns' stats are missing
+ // This implies partition schema has changed. We will merge columns
+ // present in both, overwrite stats for columns absent in metastore and
+ // leave alone columns stats missing from stats task. This last case may
+ // leave stats in stale state. This will be addressed later.
+ LOG.debug("New ColumnStats size is " + csNew.getStatsObj().size()
+ + ". But old ColumnStats size is " + csOld.getStatsObjSize());
+ }
+ // In this case, we have to find out which columns can be merged.
+ Map<String, ColumnStatisticsObj> map = new HashMap<>();
+ // We build a hash map from colName to object for old ColumnStats.
+ for (ColumnStatisticsObj obj : csOld.getStatsObj()) {
+ map.put(obj.getColName(), obj);
+ }
+ for (int index = 0; index < csNew.getStatsObj().size(); index++) {
+ ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index);
+ ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName());
+ if (statsObjOld != null) {
+ // If statsObjOld is found, we can merge.
+ ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew,
+ statsObjOld);
+ merger.merge(statsObjNew, statsObjOld);
+ }
+ list.add(statsObjNew);
+ }
+ csNew.setStatsObj(list);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
new file mode 100644
index 0000000..af0669e
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+
+public class BinaryColumnStatsMerger extends ColumnStatsMerger {
+
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats();
+ BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats();
+ aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
+ aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
new file mode 100644
index 0000000..33ff6a1
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+
+public class BooleanColumnStatsMerger extends ColumnStatsMerger {
+
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ BooleanColumnStatsData aggregateData = aggregateColStats.getStatsData().getBooleanStats();
+ BooleanColumnStatsData newData = newColStats.getStatsData().getBooleanStats();
+ aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues());
+ aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses());
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
new file mode 100644
index 0000000..33c7e3e
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class ColumnStatsMerger {
+ protected final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class.getName());
+
+ NumDistinctValueEstimator ndvEstimator = null;
+
+ public abstract void merge(ColumnStatisticsObj aggregateColStats,
+ ColumnStatisticsObj newColStats);
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
new file mode 100644
index 0000000..da6cd46
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+
+public class ColumnStatsMergerFactory {
+
+ private ColumnStatsMergerFactory() {
+ }
+
+ // we depend on the toString() method for javolution.util.FastCollection.
+ private static int countNumBitVectors(String s) {
+ if (s != null) {
+ return StringUtils.countMatches(s, "{");
+ } else {
+ return 0;
+ }
+ }
+
+ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew,
+ ColumnStatisticsObj statsObjOld) {
+ ColumnStatsMerger agg;
+ _Fields typeNew = statsObjNew.getStatsData().getSetField();
+ _Fields typeOld = statsObjOld.getStatsData().getSetField();
+ // make sure that they have the same type
+ typeNew = typeNew == typeOld ? typeNew : null;
+ int numBitVectors = 0;
+ switch (typeNew) {
+ case BOOLEAN_STATS:
+ agg = new BooleanColumnStatsMerger();
+ break;
+ case LONG_STATS: {
+ agg = new LongColumnStatsMerger();
+ int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors());
+ int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors());
+ numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+ break;
+ }
+ case DOUBLE_STATS: {
+ agg = new DoubleColumnStatsMerger();
+ int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors());
+ int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors());
+ numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+ break;
+ }
+ case STRING_STATS: {
+ agg = new StringColumnStatsMerger();
+ int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors());
+ int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors());
+ numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+ break;
+ }
+ case BINARY_STATS:
+ agg = new BinaryColumnStatsMerger();
+ break;
+ case DECIMAL_STATS: {
+ agg = new DecimalColumnStatsMerger();
+ int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors());
+ int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors());
+ numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+ break;
+ }
+ default:
+ throw new RuntimeException("Woh, bad. Unknown stats type " + typeNew.toString());
+ }
+ if (numBitVectors > 0) {
+ agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
+ }
+ return agg;
+ }
+
+ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
+ ColumnStatisticsObj cso = new ColumnStatisticsObj();
+ ColumnStatisticsData csd = new ColumnStatisticsData();
+ cso.setColName(colName);
+ cso.setColType(colType);
+ switch (type) {
+ case BOOLEAN_STATS:
+ csd.setBooleanStats(new BooleanColumnStatsData());
+ break;
+
+ case LONG_STATS:
+ csd.setLongStats(new LongColumnStatsData());
+ break;
+
+ case DOUBLE_STATS:
+ csd.setDoubleStats(new DoubleColumnStatsData());
+ break;
+
+ case STRING_STATS:
+ csd.setStringStats(new StringColumnStatsData());
+ break;
+
+ case BINARY_STATS:
+ csd.setBinaryStats(new BinaryColumnStatsData());
+ break;
+
+ case DECIMAL_STATS:
+ csd.setDecimalStats(new DecimalColumnStatsData());
+ break;
+
+ default:
+ throw new RuntimeException("Woh, bad. Unknown stats type!");
+ }
+
+ cso.setStatsData(csd);
+ return cso;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
new file mode 100644
index 0000000..c13add9
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+
+public class DecimalColumnStatsMerger extends ColumnStatsMerger {
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
+ DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
+ Decimal lowValue = aggregateData.getLowValue() != null
+ && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
+ .getLowValue() : newData.getLowValue();
+ aggregateData.setLowValue(lowValue);
+ Decimal highValue = aggregateData.getHighValue() != null
+ && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
+ .getHighValue() : newData.getHighValue();
+ aggregateData.setHighValue(highValue);
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ long ndv = ndvEstimator.estimateNumDistinctValues();
+ LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ aggregateData.setNumDVs(ndv);
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
new file mode 100644
index 0000000..fbdba24
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+
+public class DoubleColumnStatsMerger extends ColumnStatsMerger {
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats();
+ DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats();
+ aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
+ aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ long ndv = ndvEstimator.estimateNumDistinctValues();
+ LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ aggregateData.setNumDVs(ndv);
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
new file mode 100644
index 0000000..ac65590
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+
+public class LongColumnStatsMerger extends ColumnStatsMerger {
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats();
+ LongColumnStatsData newData = newColStats.getStatsData().getLongStats();
+ aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
+ aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ long ndv = ndvEstimator.estimateNumDistinctValues();
+ LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ aggregateData.setNumDVs(ndv);
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
new file mode 100644
index 0000000..4158747
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.parquet.Log;
+
+public class StringColumnStatsMerger extends ColumnStatsMerger {
+ @Override
+ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+ StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats();
+ StringColumnStatsData newData = newColStats.getStatsData().getStringStats();
+ aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
+ aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ } else {
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+ ndvEstimator.getnumBitVectors()));
+ long ndv = ndvEstimator.estimateNumDistinctValues();
+ LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+ + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+ aggregateData.setNumDVs(ndv);
+ aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index 05dfa3b..0f0df11 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -392,22 +392,16 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
return statsDesc;
}
- private int persistPartitionStats(Hive db) throws HiveException, MetaException, IOException {
-
- // Fetch result of the analyze table partition (p1=c1).. compute statistics for columns ..
+ private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
// Construct a column statistics object from the result
List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(db);
// Persist the column statistics object to the metastore
- db.setPartitionColumnStatistics(new SetPartitionsStatsRequest(colStats));
- return 0;
- }
-
- private int persistTableStats(Hive db) throws HiveException, MetaException, IOException {
- // Fetch result of the analyze table .. compute statistics for columns ..
- // Construct a column statistics object from the result
- ColumnStatistics colStats = constructColumnStatsFromPackedRows(db).get(0);
- // Persist the column statistics object to the metastore
- db.updateTableColumnStatistics(colStats);
+ // Note, this function is shared for both table and partition column stats.
+ SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+ if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) {
+ request.setNeedMerge(true);
+ }
+ db.setPartitionColumnStatistics(request);
return 0;
}
@@ -415,11 +409,7 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
public int execute(DriverContext driverContext) {
try {
Hive db = getHive();
- if (work.getColStats().isTblLevel()) {
- return persistTableStats(db);
- } else {
- return persistPartitionStats(db);
- }
+ return persistColumnStats(db);
} catch (Exception e) {
LOG.error("Failed to run column stats task", e);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 9a6e5c9..d6852dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.DriverContext;
@@ -91,9 +93,9 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
statsObj.setColName(colName.get(0));
statsObj.setColType(colType.get(0));
-
+
ColumnStatisticsData statsData = new ColumnStatisticsData();
-
+
String columnType = colType.get(0);
if (columnType.equalsIgnoreCase("long")) {
@@ -287,21 +289,11 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
return statsDesc;
}
- private int persistTableStats(Hive db) throws HiveException, MetaException,
- IOException {
- // Construct a column statistics object from user input
- ColumnStatistics colStats = constructColumnStatsFromInput();
- // Persist the column statistics object to the metastore
- db.updateTableColumnStatistics(colStats);
- return 0;
- }
-
- private int persistPartitionStats(Hive db) throws HiveException, MetaException,
- IOException {
- // Construct a column statistics object from user input
- ColumnStatistics colStats = constructColumnStatsFromInput();
- // Persist the column statistics object to the metastore
- db.updatePartitionColumnStatistics(colStats);
+ private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
+ List<ColumnStatistics> colStats = new ArrayList<>();
+ colStats.add(constructColumnStatsFromInput());
+ SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+ db.setPartitionColumnStatistics(request);
return 0;
}
@@ -309,11 +301,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
public int execute(DriverContext driverContext) {
try {
Hive db = getHive();
- if (work.getColStats().isTblLevel()) {
- return persistTableStats(db);
- } else {
- return persistPartitionStats(db);
- }
+ return persistColumnStats(db);
} catch (Exception e) {
LOG.info("Failed to persist stats in metastore", e);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 26f61c5..6af48ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -3312,24 +3312,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
return indexes;
}
- public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws HiveException {
- try {
- return getMSC().updateTableColumnStatistics(statsObj);
- } catch (Exception e) {
- LOG.debug(StringUtils.stringifyException(e));
- throw new HiveException(e);
- }
- }
-
- public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws HiveException {
- try {
- return getMSC().updatePartitionColumnStatistics(statsObj);
- } catch (Exception e) {
- LOG.debug(StringUtils.stringifyException(e));
- throw new HiveException(e);
- }
- }
-
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException {
try {
return getMSC().setPartitionColumnStatistics(request);
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index db2b674..8eb011e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -50,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.TableMeta;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
@@ -325,15 +327,19 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I
/** {@inheritDoc} */
@Override
- public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+ public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request)
throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
InvalidInputException {
- String dbName = statsObj.getStatsDesc().getDbName().toLowerCase();
- String tableName = statsObj.getStatsDesc().getTableName().toLowerCase();
- if (getTempTable(dbName, tableName) != null) {
- return updateTempTableColumnStats(dbName, tableName, statsObj);
+ if (request.getColStatsSize() == 1) {
+ ColumnStatistics colStats = request.getColStatsIterator().next();
+ ColumnStatisticsDesc desc = colStats.getStatsDesc();
+ String dbName = desc.getDbName().toLowerCase();
+ String tableName = desc.getTableName().toLowerCase();
+ if (getTempTable(dbName, tableName) != null) {
+ return updateTempTableColumnStats(dbName, tableName, colStats);
+ }
}
- return super.updateTableColumnStatistics(statsObj);
+ return super.setPartitionColumnStatistics(request);
}
/** {@inheritDoc} */
http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
index c000db2..97f323f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
@@ -29,6 +29,7 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
private static final long serialVersionUID = 1L;
private boolean isTblLevel;
+ private int numBitVector;
private String tableName;
private List<String> colName;
private List<String> colType;
@@ -36,12 +37,22 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
public ColumnStatsDesc() {
}
+ public ColumnStatsDesc(String tableName, List<String> colName, List<String> colType,
+ boolean isTblLevel) {
+ this.tableName = tableName;
+ this.colName = colName;
+ this.colType = colType;
+ this.isTblLevel = isTblLevel;
+ this.numBitVector = 0;
+ }
+
public ColumnStatsDesc(String tableName, List<String> colName,
- List<String> colType, boolean isTblLevel) {
+ List<String> colType, boolean isTblLevel, int numBitVector) {
this.tableName = tableName;
this.colName = colName;
this.colType = colType;
this.isTblLevel = isTblLevel;
+ this.numBitVector = numBitVector;
}
@Explain(displayName = "Table")
@@ -79,4 +90,13 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
public void setColType(List<String> colType) {
this.colType = colType;
}
+
+ public int getNumBitVector() {
+ return numBitVector;
+ }
+
+ public void setNumBitVector(int numBitVector) {
+ this.numBitVector = numBitVector;
+ }
+
}