You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/11/07 05:55:04 UTC
svn commit: r1406465 [12/15] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/
metastore/if/ metastore/src/gen/thrift/gen-cpp/
metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/
metastore/src/gen/...
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Wed Nov 7 04:55:00 2012
@@ -56,14 +56,23 @@ import org.apache.hadoop.hive.common.cla
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Order;
@@ -77,6 +86,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
@@ -91,6 +101,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.model.MOrder;
import org.apache.hadoop.hive.metastore.model.MPartition;
import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege;
+import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics;
import org.apache.hadoop.hive.metastore.model.MPartitionEvent;
import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege;
import org.apache.hadoop.hive.metastore.model.MRole;
@@ -100,6 +111,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.model.MStringList;
import org.apache.hadoop.hive.metastore.model.MTable;
import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege;
+import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics;
import org.apache.hadoop.hive.metastore.model.MTablePrivilege;
import org.apache.hadoop.hive.metastore.model.MType;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.ANTLRNoCaseStringStream;
@@ -673,7 +685,8 @@ public class ObjectStore implements RawS
}
}
- public boolean dropTable(String dbName, String tableName) throws MetaException {
+ public boolean dropTable(String dbName, String tableName) throws MetaException,
+ NoSuchObjectException, InvalidObjectException, InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -701,6 +714,13 @@ public class ObjectStore implements RawS
if (partColGrants != null && partColGrants.size() > 0) {
pm.deletePersistentAll(partColGrants);
}
+ // delete column statistics if present
+ try {
+ deleteTableColumnStatistics(dbName, tableName, null);
+ } catch (NoSuchObjectException e) {
+ LOG.info("Found no table level column statistics associated with db " + dbName +
+ " table " + tableName + " record to delete");
+ }
preDropStorageDescriptor(tbl.getSd());
// then remove the table
@@ -1267,7 +1287,8 @@ public class ObjectStore implements RawS
@Override
public boolean dropPartition(String dbName, String tableName,
- List<String> part_vals) throws MetaException {
+ List<String> part_vals) throws MetaException, NoSuchObjectException, InvalidObjectException,
+ InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -1287,8 +1308,13 @@ public class ObjectStore implements RawS
* drop the storage descriptor cleanly, etc.)
* @param part - the MPartition to drop
* @return whether the transaction committed successfully
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ * @throws MetaException
+ * @throws NoSuchObjectException
*/
- private boolean dropPartitionCommon(MPartition part) {
+ private boolean dropPartitionCommon(MPartition part) throws NoSuchObjectException, MetaException,
+ InvalidObjectException, InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -1316,6 +1342,17 @@ public class ObjectStore implements RawS
if (partColumnGrants != null && partColumnGrants.size() > 0) {
pm.deletePersistentAll(partColumnGrants);
}
+
+ String dbName = part.getTable().getDatabase().getName();
+ String tableName = part.getTable().getTableName();
+
+ // delete partition level column stats if it exists
+ try {
+ deletePartitionColumnStatistics(dbName, tableName, partName, part.getValues(), null);
+ } catch (NoSuchObjectException e) {
+ LOG.info("No column statistics records found to delete");
+ }
+
preDropStorageDescriptor(part.getSd());
pm.deletePersistent(part);
}
@@ -4446,6 +4483,743 @@ public class ObjectStore implements RawS
}
}
+ // Methods to persist, maintain and retrieve Column Statistics
+ private MTableColumnStatistics convertToMTableColumnStatistics(ColumnStatisticsDesc statsDesc,
+ ColumnStatisticsObj statsObj) throws NoSuchObjectException,
+ MetaException, InvalidObjectException
+ {
+ if (statsObj == null || statsDesc == null) {
+ throw new InvalidObjectException("Invalid column stats object");
+ }
+
+ String dbName = statsDesc.getDbName();
+ String tableName = statsDesc.getTableName();
+ MTable table = getMTable(dbName, tableName);
+
+ if (table == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is gathered doesn't exist.");
+ }
+
+ MTableColumnStatistics mColStats = new MTableColumnStatistics();
+ mColStats.setTable(table);
+ mColStats.setDbName(statsDesc.getDbName());
+ mColStats.setTableName(statsDesc.getTableName());
+ mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
+ mColStats.setColName(statsObj.getColName());
+ mColStats.setColType(statsObj.getColType());
+
+ if (statsObj.getStatsData().isSetBooleanStats()) {
+ BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
+ mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(),
+ boolStats.getNumNulls());
+ } else if (statsObj.getStatsData().isSetLongStats()) {
+ LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
+ mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(),
+ longStats.getLowValue(), longStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDoubleStats()) {
+ DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
+ mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
+ doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetStringStats()) {
+ StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
+ mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
+ stringStats.getMaxColLen(), stringStats.getAvgColLen());
+ } else if (statsObj.getStatsData().isSetBinaryStats()) {
+ BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
+ mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(),
+ binaryStats.getAvgColLen());
+ }
+ return mColStats;
+ }
+
+ private ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj) {
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColType(mStatsObj.getColType());
+ statsObj.setColName(mStatsObj.getColName());
+ String colType = mStatsObj.getColType();
+ ColumnStatisticsData colStatsData = new ColumnStatisticsData();
+
+ if (colType.equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
+ boolStats.setNumFalses(mStatsObj.getNumFalses());
+ boolStats.setNumTrues(mStatsObj.getNumTrues());
+ boolStats.setNumNulls(mStatsObj.getNumNulls());
+ colStatsData.setBooleanStats(boolStats);
+ } else if (colType.equalsIgnoreCase("string")) {
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ stringStats.setNumNulls(mStatsObj.getNumNulls());
+ stringStats.setAvgColLen(mStatsObj.getAvgColLen());
+ stringStats.setMaxColLen(mStatsObj.getMaxColLen());
+ stringStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setStringStats(stringStats);
+ } else if (colType.equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+ binaryStats.setNumNulls(mStatsObj.getNumNulls());
+ binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
+ binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
+ colStatsData.setBinaryStats(binaryStats);
+ } else if (colType.equalsIgnoreCase("bigint") || colType.equalsIgnoreCase("int") ||
+ colType.equalsIgnoreCase("smallint") || colType.equalsIgnoreCase("tinyint") ||
+ colType.equalsIgnoreCase("timestamp")) {
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setNumNulls(mStatsObj.getNumNulls());
+ longStats.setHighValue(mStatsObj.getHighValueAsLong());
+ longStats.setLowValue(mStatsObj.getLowValueAsLong());
+ longStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setLongStats(longStats);
+ } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) {
+ DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ doubleStats.setNumNulls(mStatsObj.getNumNulls());
+ doubleStats.setHighValue(mStatsObj.getHighValueAsDouble());
+ doubleStats.setLowValue(mStatsObj.getLowValueAsDouble());
+ doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDoubleStats(doubleStats);
+ }
+ statsObj.setStatsData(colStatsData);
+ return statsObj;
+ }
+
+ private ColumnStatisticsDesc getTableColumnStatisticsDesc(MTableColumnStatistics mStatsObj) {
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setIsTblLevel(true);
+ statsDesc.setDbName(mStatsObj.getDbName());
+ statsDesc.setTableName(mStatsObj.getTableName());
+ statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ return statsDesc;
+ }
+
+ private ColumnStatistics convertToTableColumnStatistics(MTableColumnStatistics mStatsObj)
+ throws MetaException
+ {
+ if (mStatsObj == null) {
+ return null;
+ }
+
+ ColumnStatisticsDesc statsDesc = getTableColumnStatisticsDesc(mStatsObj);
+ ColumnStatisticsObj statsObj = getTableColumnStatisticsObj(mStatsObj);
+ List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+ return colStats;
+ }
+
+ private MPartitionColumnStatistics convertToMPartitionColumnStatistics(ColumnStatisticsDesc
+ statsDesc, ColumnStatisticsObj statsObj, List<String> partVal)
+ throws MetaException, NoSuchObjectException
+ {
+ if (statsDesc == null || statsObj == null || partVal == null) {
+ return null;
+ }
+
+ MPartition partition = getMPartition(statsDesc.getDbName(), statsDesc.getTableName(), partVal);
+
+ if (partition == null) {
+ throw new NoSuchObjectException("Partition for which stats is gathered doesn't exist.");
+ }
+
+ MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics();
+ mColStats.setPartition(partition);
+ mColStats.setDbName(statsDesc.getDbName());
+ mColStats.setTableName(statsDesc.getTableName());
+ mColStats.setPartitionName(statsDesc.getPartName());
+ mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
+ mColStats.setColName(statsObj.getColName());
+ mColStats.setColType(statsObj.getColType());
+
+ if (statsObj.getStatsData().isSetBooleanStats()) {
+ BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
+ mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(),
+ boolStats.getNumNulls());
+ } else if (statsObj.getStatsData().isSetLongStats()) {
+ LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
+ mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(),
+ longStats.getLowValue(), longStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDoubleStats()) {
+ DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
+ mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
+ doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetStringStats()) {
+ StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
+ mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
+ stringStats.getMaxColLen(), stringStats.getAvgColLen());
+ } else if (statsObj.getStatsData().isSetBinaryStats()) {
+ BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
+ mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(),
+ binaryStats.getAvgColLen());
+ }
+ return mColStats;
+ }
+
+ private void writeMTableColumnStatistics(MTableColumnStatistics mStatsObj)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ String dbName = mStatsObj.getDbName();
+ String tableName = mStatsObj.getTableName();
+ String colName = mStatsObj.getColName();
+
+ LOG.info("Updating table level column statistics for db=" + dbName + " tableName=" + tableName
+ + " colName=" + colName);
+
+ MTable mTable = getMTable(mStatsObj.getDbName(), mStatsObj.getTableName());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MStorageDescriptor mSDS = mTable.getSd();
+ List<MFieldSchema> colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(mStatsObj.getColName().trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new
+ NoSuchObjectException("Column " + colName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MTableColumnStatistics oldStatsObj = getMTableColumnStatistics(dbName, tableName, colName);
+
+ if (oldStatsObj != null) {
+ oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen());
+ oldStatsObj.setHighValue(mStatsObj.getHighValue());
+ oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ oldStatsObj.setLowValue(mStatsObj.getLowValue());
+ oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
+ oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
+ oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
+ oldStatsObj.setNumTrues(mStatsObj.getNumTrues());
+ oldStatsObj.setNumNulls(mStatsObj.getNumNulls());
+ } else {
+ pm.makePersistent(mStatsObj);
+ }
+ }
+
+ private ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj)
+ {
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColType(mStatsObj.getColType());
+ statsObj.setColName(mStatsObj.getColName());
+ String colType = mStatsObj.getColType();
+ ColumnStatisticsData colStatsData = new ColumnStatisticsData();
+
+ if (colType.equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
+ boolStats.setNumFalses(mStatsObj.getNumFalses());
+ boolStats.setNumTrues(mStatsObj.getNumTrues());
+ boolStats.setNumNulls(mStatsObj.getNumNulls());
+ colStatsData.setBooleanStats(boolStats);
+ } else if (colType.equalsIgnoreCase("string")) {
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ stringStats.setNumNulls(mStatsObj.getNumNulls());
+ stringStats.setAvgColLen(mStatsObj.getAvgColLen());
+ stringStats.setMaxColLen(mStatsObj.getMaxColLen());
+ stringStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setStringStats(stringStats);
+ } else if (colType.equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+ binaryStats.setNumNulls(mStatsObj.getNumNulls());
+ binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
+ binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
+ colStatsData.setBinaryStats(binaryStats);
+ } else if (colType.equalsIgnoreCase("tinyint") || colType.equalsIgnoreCase("smallint") ||
+ colType.equalsIgnoreCase("int") || colType.equalsIgnoreCase("bigint") ||
+ colType.equalsIgnoreCase("timestamp")) {
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setNumNulls(mStatsObj.getNumNulls());
+ longStats.setHighValue(mStatsObj.getHighValueAsLong());
+ longStats.setLowValue(mStatsObj.getLowValueAsLong());
+ longStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setLongStats(longStats);
+ } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) {
+ DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ doubleStats.setNumNulls(mStatsObj.getNumNulls());
+ doubleStats.setHighValue(mStatsObj.getHighValueAsDouble());
+ doubleStats.setLowValue(mStatsObj.getLowValueAsDouble());
+ doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDoubleStats(doubleStats);
+ }
+ statsObj.setStatsData(colStatsData);
+ return statsObj;
+ }
+
+ private ColumnStatisticsDesc getPartitionColumnStatisticsDesc(
+ MPartitionColumnStatistics mStatsObj) {
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setIsTblLevel(false);
+ statsDesc.setDbName(mStatsObj.getDbName());
+ statsDesc.setTableName(mStatsObj.getTableName());
+ statsDesc.setPartName(mStatsObj.getPartitionName());
+ statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ return statsDesc;
+ }
+
+ private void writeMPartitionColumnStatistics(MPartitionColumnStatistics mStatsObj,
+ List<String> partVal) throws NoSuchObjectException, MetaException, InvalidObjectException,
+ InvalidInputException
+ {
+ String dbName = mStatsObj.getDbName();
+ String tableName = mStatsObj.getTableName();
+ String partName = mStatsObj.getPartitionName();
+ String colName = mStatsObj.getColName();
+
+ LOG.info("Updating partition level column statistics for db=" + dbName + " tableName=" +
+ tableName + " partName=" + partName + " colName=" + colName);
+
+ MTable mTable = getMTable(mStatsObj.getDbName(), mStatsObj.getTableName());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MPartition mPartition =
+ getMPartition(mStatsObj.getDbName(), mStatsObj.getTableName(), partVal);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats gathering is requested doesn't exist");
+ }
+
+ MStorageDescriptor mSDS = mPartition.getSd();
+ List<MFieldSchema> colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(mStatsObj.getColName().trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new
+ NoSuchObjectException("Column " + colName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MPartitionColumnStatistics oldStatsObj = getMPartitionColumnStatistics(dbName, tableName,
+ partName, partVal, colName);
+ if (oldStatsObj != null) {
+ oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen());
+ oldStatsObj.setHighValue(mStatsObj.getHighValue());
+ oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ oldStatsObj.setLowValue(mStatsObj.getLowValue());
+ oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
+ oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
+ oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
+ oldStatsObj.setNumTrues(mStatsObj.getNumTrues());
+ oldStatsObj.setNumNulls(mStatsObj.getNumNulls());
+ } else {
+ pm.makePersistent(mStatsObj);
+ }
+ }
+
+ public boolean updateTableColumnStatistics(ColumnStatistics colStats)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean committed = false;
+
+ try {
+ openTransaction();
+ List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ MTableColumnStatistics mStatsObj = convertToMTableColumnStatistics(statsDesc, statsObj);
+ writeMTableColumnStatistics(mStatsObj);
+ }
+ committed = commitTransaction();
+ return committed;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List<String> partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean committed = false;
+
+ try {
+ openTransaction();
+ List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ MPartitionColumnStatistics mStatsObj =
+ convertToMPartitionColumnStatistics(statsDesc, statsObj, partVals);
+ writeMPartitionColumnStatistics(mStatsObj, partVals);
+ }
+ committed = commitTransaction();
+ return committed;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ private MTableColumnStatistics getMTableColumnStatistics(String dbName, String tableName,
+ String colName) throws NoSuchObjectException, InvalidInputException, MetaException
+ {
+ boolean committed = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null || colName == null) {
+ throw new InvalidInputException("TableName/ColName passed to get_table_column_statistics " +
+ "is null");
+ }
+
+ try {
+ openTransaction();
+ MTableColumnStatistics mStatsObj = null;
+ MTable mTable = getMTable(dbName.trim(), tableName.trim());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ MStorageDescriptor mSDS = mTable.getSd();
+ List<MFieldSchema> colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(colName.trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new NoSuchObjectException("Column " + colName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ Query query = pm.newQuery(MTableColumnStatistics.class);
+ query.setFilter("table.tableName == t1 && " +
+ "dbName == t2 && " + "colName == t3");
+ query
+ .declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
+ query.setUnique(true);
+
+ mStatsObj = (MTableColumnStatistics) query.execute(tableName.trim(),
+ dbName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+ committed = commitTransaction();
+ return mStatsObj;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ return null;
+ }
+ }
+ }
+
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException
+ {
+ ColumnStatistics statsObj;
+ MTableColumnStatistics mStatsObj = getMTableColumnStatistics(dbName, tableName, colName);
+
+ if (mStatsObj == null) {
+ throw new NoSuchObjectException("Statistics for dbName=" + dbName + " tableName=" + tableName
+ + " columnName=" + colName + " doesn't exist.");
+ }
+
+ statsObj = convertToTableColumnStatistics(mStatsObj);
+ return statsObj;
+ }
+
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVal, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException
+ {
+ ColumnStatistics statsObj;
+ MPartitionColumnStatistics mStatsObj =
+ getMPartitionColumnStatistics(dbName, tableName, partName, partVal, colName);
+
+ if (mStatsObj == null) {
+ throw new NoSuchObjectException("Statistics for dbName=" + dbName + " tableName=" + tableName
+ + " partName= " + partName + " columnName=" + colName + " doesn't exist.");
+ }
+ statsObj = convertToPartColumnStatistics(mStatsObj);
+ return statsObj;
+ }
+
+ private ColumnStatistics convertToPartColumnStatistics(MPartitionColumnStatistics mStatsObj)
+ {
+ if (mStatsObj == null) {
+ return null;
+ }
+
+ ColumnStatisticsDesc statsDesc = getPartitionColumnStatisticsDesc(mStatsObj);
+ ColumnStatisticsObj statsObj = getPartitionColumnStatisticsObj(mStatsObj);
+ List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+ return colStats;
+ }
+
+ private MPartitionColumnStatistics getMPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVal, String colName) throws NoSuchObjectException,
+ InvalidInputException, MetaException
+ {
+ boolean committed = false;
+ MPartitionColumnStatistics mStatsObj = null;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null || partVal == null || colName == null) {
+ throw new InvalidInputException("TableName/PartName/ColName passed to " +
+ " get_partition_column_statistics is null");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName.trim(), tableName.trim());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ MPartition mPartition =
+ getMPartition(dbName, tableName, partVal);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats is requested doesn't exist");
+ }
+
+ MStorageDescriptor mSDS = mPartition.getSd();
+ List<MFieldSchema> colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(colName.trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new NoSuchObjectException("Column " + colName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ Query query = pm.newQuery(MPartitionColumnStatistics.class);
+ query.setFilter("partition.partitionName == t1 && " +
+ "dbName == t2 && " + "tableName == t3 && " + "colName == t4");
+ query
+ .declareParameters("java.lang.String t1, java.lang.String t2, " +
+ "java.lang.String t3, java.lang.String t4");
+ query.setUnique(true);
+
+ mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(),
+ dbName.trim(), tableName.trim(),
+ colName.trim());
+ pm.retrieve(mStatsObj);
+ committed = commitTransaction();
+ return mStatsObj;
+
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVals,String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean ret = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null) {
+ throw new InvalidInputException("Table name is null.");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName, tableName);
+ MPartitionColumnStatistics mStatsObj;
+ List<MPartitionColumnStatistics> mStatsObjColl;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ MPartition mPartition =
+ getMPartition(dbName, tableName, partVals);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ Query query = pm.newQuery(MPartitionColumnStatistics.class);
+ String filter;
+ String parameters;
+
+ if (colName != null) {
+ filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && " +
+ "colName == t4";
+ parameters = "java.lang.String t1, java.lang.String t2, " +
+ "java.lang.String t3, java.lang.String t4";
+ } else {
+ filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3";
+ parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ }
+
+ query.setFilter(filter);
+ query
+ .declareParameters(parameters);
+
+ if (colName != null) {
+ query.setUnique(true);
+ mStatsObj = (MPartitionColumnStatistics)query.executeWithArray(partName.trim(),
+ dbName.trim(), tableName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+
+ if (mStatsObj != null) {
+ pm.deletePersistent(mStatsObj);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" +dbName + " table="
+ + tableName + " partition=" + partName + " col=" + colName);
+ }
+ } else {
+ mStatsObjColl= (List<MPartitionColumnStatistics>)query.execute(partName.trim(),
+ dbName.trim(), tableName.trim());
+ pm.retrieveAll(mStatsObjColl);
+
+ if (mStatsObjColl != null) {
+ pm.deletePersistentAll(mStatsObjColl);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName +
+ " table=" + tableName + " partition" + partName);
+ }
+ }
+ ret = commitTransaction();
+ } catch(NoSuchObjectException e) {
+ rollbackTransaction();
+ throw e;
+ } finally {
+ if (!ret) {
+ rollbackTransaction();
+ }
+ }
+ return ret;
+ }
+
+ public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean ret = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null) {
+ throw new InvalidInputException("Table name is null.");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName, tableName);
+ MTableColumnStatistics mStatsObj;
+ List<MTableColumnStatistics> mStatsObjColl;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ Query query = pm.newQuery(MTableColumnStatistics.class);
+ String filter;
+ String parameters;
+
+ if (colName != null) {
+ filter = "table.tableName == t1 && dbName == t2 && colName == t3";
+ parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ } else {
+ filter = "table.tableName == t1 && dbName == t2";
+ parameters = "java.lang.String t1, java.lang.String t2";
+ }
+
+ query.setFilter(filter);
+ query
+ .declareParameters(parameters);
+
+ if (colName != null) {
+ query.setUnique(true);
+ mStatsObj = (MTableColumnStatistics)query.execute(tableName.trim(),
+ dbName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+
+ if (mStatsObj != null) {
+ pm.deletePersistent(mStatsObj);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" +dbName + " table="
+ + tableName + " col=" + colName);
+ }
+ } else {
+ mStatsObjColl= (List<MTableColumnStatistics>)query.execute(tableName.trim(), dbName.trim());
+ pm.retrieveAll(mStatsObjColl);
+
+ if (mStatsObjColl != null) {
+ pm.deletePersistentAll(mStatsObjColl);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName +
+ " table=" + tableName);
+ }
+ }
+ ret = commitTransaction();
+ } catch(NoSuchObjectException e) {
+ rollbackTransaction();
+ throw e;
+ } finally {
+ if (!ret) {
+ rollbackTransaction();
+ }
+ }
+ return ret;
+ }
+
@Override
public long cleanupEvents() {
boolean commited = false;
@@ -4468,4 +5242,5 @@ public class ObjectStore implements RawS
}
return delCnt;
}
+
}
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Wed Nov 7 04:55:00 2012
@@ -22,8 +22,10 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -97,7 +99,7 @@ public interface RawStore extends Config
MetaException;
public abstract boolean dropTable(String dbName, String tableName)
- throws MetaException;
+ throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException;
public abstract Table getTable(String dbName, String tableName)
throws MetaException;
@@ -109,7 +111,8 @@ public interface RawStore extends Config
List<String> part_vals) throws MetaException, NoSuchObjectException;
public abstract boolean dropPartition(String dbName, String tableName,
- List<String> part_vals) throws MetaException;
+ List<String> part_vals) throws MetaException, NoSuchObjectException, InvalidObjectException,
+ InvalidInputException;
public abstract List<Partition> getPartitions(String dbName,
String tableName, int max) throws MetaException;
@@ -306,5 +309,116 @@ public interface RawStore extends Config
List<String> part_vals, short max_parts, String userName, List<String> groupNames)
throws MetaException, InvalidObjectException, NoSuchObjectException;
+ /** Persists the given column statistics object to the metastore
+ * @param partVals
+ *
+ * @param ColumnStats object to persist
+ * @param List of partVals
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+ public abstract boolean updateTableColumnStatistics(ColumnStatistics colStats)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /** Persists the given column statistics object to the metastore
+ * @param partVals
+ *
+ * @param ColumnStats object to persist
+ * @param List of partVals
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+ public abstract boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,
+ List<String> partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /**
+ * Returns the relevant column statistics for a given column in a given table in a given database
+ * if such statistics exist.
+ *
+ * @param The name of the database, defaults to current database
+ * @param The name of the table
+ * @param The name of the column for which statistics is requested
+ * @return Relevant column statistics for the column for the given table
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidInputException
+ *
+ */
+ public abstract ColumnStatistics getTableColumnStatistics(String dbName, String tableName,
+ String colName) throws MetaException, NoSuchObjectException, InvalidInputException,
+ InvalidObjectException;
+
+ /**
+ * Returns the relevant column statistics for a given column in a given partition in a given
+ * table in a given database if such statistics exist.
+ * @param partName
+ *
+ * @param The name of the database, defaults to current database
+ * @param The name of the table
+ * @param The name of the partition
+ * @param List of partVals for the partition
+ * @param The name of the column for which statistics is requested
+ * @return Relevant column statistics for the column for the given partition in a given table
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ *
+ */
+
+ public abstract ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVals, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException, InvalidObjectException;
+
+ /**
+ * Deletes column statistics if present associated with a given db, table, partition and col. If
+ * null is passed instead of a colName, stats when present for all columns associated
+ * with a given db, table and partition are deleted.
+ *
+ * @param dbName
+ * @param tableName
+ * @param partName
+ * @param partVals
+ * @param colName
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+
+ public abstract boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVals, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /**
+ * Deletes column statistics if present associated with a given db, table and col. If
+ * null is passed instead of a colName, stats when present for all columns associated
+ * with a given db and table are deleted.
+ *
+ * @param dbName
+ * @param tableName
+ * @param colName
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+
+ public abstract boolean deleteTableColumnStatistics(String dbName, String tableName,
+ String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
public abstract long cleanupEvents();
+
+
+
}
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Wed Nov 7 04:55:00 2012
@@ -376,6 +376,38 @@ public class Warehouse {
}
}
+ public static Map<String, String> makeEscSpecFromName(String name) throws MetaException {
+
+ if (name == null || name.isEmpty()) {
+ throw new MetaException("Partition name is invalid. " + name);
+ }
+ LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
+
+ Path currPath = new Path(name);
+
+ List<String[]> kvs = new ArrayList<String[]>();
+ do {
+ String component = currPath.getName();
+ Matcher m = pat.matcher(component);
+ if (m.matches()) {
+ String k = m.group(1);
+ String v = m.group(2);
+ String[] kv = new String[2];
+ kv[0] = k;
+ kv[1] = v;
+ kvs.add(kv);
+ }
+ currPath = currPath.getParent();
+ } while (currPath != null && !currPath.getName().isEmpty());
+
+ // reverse the list since we checked the part from leaf dir to table's base dir
+ for (int i = kvs.size(); i > 0; i--) {
+ partSpec.put(kvs.get(i - 1)[0], kvs.get(i - 1)[1]);
+ }
+
+ return partSpec;
+ }
+
public Path getPartitionPath(Database db, String tableName,
LinkedHashMap<String, String> pm) throws MetaException {
return new Path(getTablePath(db, tableName), makePartPath(pm));
Added: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java?rev=1406465&view=auto
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java (added)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java Wed Nov 7 04:55:00 2012
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.nio.ByteBuffer;
+
+
+/**
+ *
+ * MPartitionColumnStatistics - Represents Hive's partiton level Column Statistics Description.
+ * The fields in this class with the exception of partition are persisted in the metastore.
+ * In case of partition, part_id is persisted in its place.
+ *
+ */
+public class MPartitionColumnStatistics {
+
+ private MPartition partition;
+
+ private String dbName;
+ private String tableName;
+ private String partitionName;
+ private String colName;
+ private String colType;
+
+ private byte[] lowValue;
+ private byte[] highValue;
+ private long numNulls;
+ private long numDVs;
+ private double avgColLen;
+ private long maxColLen;
+ private long numTrues;
+ private long numFalses;
+ private long lastAnalyzed;
+
+ public MPartitionColumnStatistics() {}
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public String getColName() {
+ return colName;
+ }
+
+ public void setColName(String colName) {
+ this.colName = colName;
+ }
+
+ public byte[] getLowValue() {
+ return lowValue;
+ }
+
+ public long getLowValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getLong();
+ }
+
+ public double getLowValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getDouble();
+ }
+
+ public byte[] getHighValue() {
+ return highValue;
+ }
+
+ public long getHighValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getLong();
+ }
+
+ public double getHighValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getDouble();
+ }
+
+ public void setHighValue(byte[] b) {
+ this.highValue = b;
+ }
+
+ public void setLowValue(byte[] b) {
+ this.lowValue = b;
+ }
+
+ public long getNumNulls() {
+ return numNulls;
+ }
+
+ public void setNumNulls(long numNulls) {
+ this.numNulls = numNulls;
+ }
+
+ public long getNumDVs() {
+ return numDVs;
+ }
+
+ public void setNumDVs(long numDVs) {
+ this.numDVs = numDVs;
+ }
+
+ public double getAvgColLen() {
+ return avgColLen;
+ }
+
+ public void setAvgColLen(double avgColLen) {
+ this.avgColLen = avgColLen;
+ }
+
+ public long getMaxColLen() {
+ return maxColLen;
+ }
+
+ public void setMaxColLen(long maxColLen) {
+ this.maxColLen = maxColLen;
+ }
+
+ public long getNumTrues() {
+ return numTrues;
+ }
+
+ public void setNumTrues(long numTrues) {
+ this.numTrues = numTrues;
+ }
+
+ public long getNumFalses() {
+ return numFalses;
+ }
+
+ public void setNumFalses(long numFalses) {
+ this.numFalses = numFalses;
+ }
+
+ public long getLastAnalyzed() {
+ return lastAnalyzed;
+ }
+
+ public void setLastAnalyzed(long lastAnalyzed) {
+ this.lastAnalyzed = lastAnalyzed;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public void setDbName(String dbName) {
+ this.dbName = dbName;
+ }
+
+ public MPartition getPartition() {
+ return partition;
+ }
+
+ public void setPartition(MPartition partition) {
+ this.partition = partition;
+ }
+
+ public String getPartitionName() {
+ return partitionName;
+ }
+
+ public void setPartitionName(String partitionName) {
+ this.partitionName = partitionName;
+ }
+
+ public String getColType() {
+ return colType;
+ }
+
+ public void setColType(String colType) {
+ this.colType = colType;
+ }
+
+ public void setBooleanStats(long numTrues, long numFalses, long numNulls) {
+ this.numTrues = numTrues;
+ this.numFalses = numFalses;
+ this.numNulls = numNulls;
+ }
+
+ public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+
+ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+}
Propchange: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java?rev=1406465&view=auto
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java (added)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java Wed Nov 7 04:55:00 2012
@@ -0,0 +1,223 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.nio.ByteBuffer;
+
+
+/**
+ *
+ * MTableColumnStatistics - Represents Hive's Column Statistics Description. The fields in this
+ * class with the exception of table are persisted in the metastore. In case of table, tbl_id is
+ * persisted in its place.
+ *
+ */
+public class MTableColumnStatistics {
+
+ private MTable table;
+ private String dbName;
+ private String tableName;
+ private String colName;
+ private String colType;
+
+ private byte[] lowValue;
+ private byte[] highValue;
+ private long numNulls;
+ private long numDVs;
+ private double avgColLen;
+ private long maxColLen;
+ private long numTrues;
+ private long numFalses;
+ private long lastAnalyzed;
+
+ public MTableColumnStatistics() {}
+
+ public MTable getTable() {
+ return table;
+ }
+
+ public void setTable(MTable table) {
+ this.table = table;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public String getColName() {
+ return colName;
+ }
+
+ public void setColName(String colName) {
+ this.colName = colName;
+ }
+
+ public String getColType() {
+ return colType;
+ }
+
+ public void setColType(String colType) {
+ this.colType = colType;
+ }
+
+ public byte[] getLowValue() {
+ return lowValue;
+ }
+
+ public long getLowValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getLong();
+ }
+
+ public double getLowValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getDouble();
+ }
+
+ public byte[] getHighValue() {
+ return highValue;
+ }
+
+ public long getHighValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getLong();
+ }
+
+ public double getHighValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getDouble();
+ }
+
+ public void setHighValue(byte[] b) {
+ this.highValue = b;
+ }
+
+ public void setLowValue(byte[] b) {
+ this.lowValue = b;
+ }
+
+ public long getNumNulls() {
+ return numNulls;
+ }
+
+
+ public void setNumNulls(long numNulls) {
+ this.numNulls = numNulls;
+ }
+
+ public long getNumDVs() {
+ return numDVs;
+ }
+
+ public void setNumDVs(long numDVs) {
+ this.numDVs = numDVs;
+ }
+
+ public double getAvgColLen() {
+ return avgColLen;
+ }
+
+ public void setAvgColLen(double avgColLen) {
+ this.avgColLen = avgColLen;
+ }
+
+ public long getMaxColLen() {
+ return maxColLen;
+ }
+
+ public void setMaxColLen(long maxColLen) {
+ this.maxColLen = maxColLen;
+ }
+
+ public long getNumTrues() {
+ return numTrues;
+ }
+
+ public void setNumTrues(long numTrues) {
+ this.numTrues = numTrues;
+ }
+
+ public long getNumFalses() {
+ return numFalses;
+ }
+
+ public void setNumFalses(long numFalses) {
+ this.numFalses = numFalses;
+ }
+
+ public long getLastAnalyzed() {
+ return lastAnalyzed;
+ }
+
+ public void setLastAnalyzed(long lastAnalyzed) {
+ this.lastAnalyzed = lastAnalyzed;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public void setDbName(String dbName) {
+ this.dbName = dbName;
+ }
+
+ public void setBooleanStats(long numTrues, long numFalses, long numNulls) {
+ this.numTrues = numTrues;
+ this.numFalses = numFalses;
+ this.numNulls = numNulls;
+ }
+
+ public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+
+ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+}
Propchange: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: hive/trunk/metastore/src/model/package.jdo
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/package.jdo?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/package.jdo (original)
+++ hive/trunk/metastore/src/model/package.jdo Wed Nov 7 04:55:00 2012
@@ -751,5 +751,104 @@
</field>
</class>
+
+ <class name="MTableColumnStatistics" table="TAB_COL_STATS" identity-type="datastore" detachable="true">
+ <datastore-identity>
+ <column name="CS_ID"/>
+ </datastore-identity>
+
+ <field name ="dbName">
+ <column name="DB_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="tableName">
+ <column name="TABLE_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="table">
+ <column name="TBL_ID"/>
+ </field>
+ <field name="colName">
+ <column name="COLUMN_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="colType">
+ <column name="COLUMN_TYPE" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="lowValue">
+ <column name="LOW_VALUE" jdbc-type="LONGVARBINARY" allows-null="true"/>
+ </field>
+ <field name="highValue">
+ <column name="HIGH_VALUE" jdbc-type="LONGVARBINARY" allows-null="true"/>
+ </field>
+ <field name="numNulls">
+ <column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/>
+ </field>
+ <field name="numDVs">
+ <column name="NUM_DISTINCTS" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="avgColLen">
+ <column name="AVG_COL_LEN" jdbc-type="DOUBLE" allows-null="true"/>
+ </field>
+ <field name="maxColLen">
+ <column name="MAX_COL_LEN" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="numTrues">
+ <column name="NUM_TRUES" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="numFalses">
+ <column name="NUM_FALSES" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="lastAnalyzed">
+ <column name="LAST_ANALYZED" jdbc-type="BIGINT" allows-null="false"/>
+ </field>
+ </class>
+
+ <class name="MPartitionColumnStatistics" table="PART_COL_STATS" identity-type="datastore" detachable="true">
+ <datastore-identity>
+ <column name="CS_ID"/>
+ </datastore-identity>
+
+ <field name ="dbName">
+ <column name="DB_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="tableName">
+ <column name="TABLE_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="partition">
+ <column name="PART_ID"/>
+ </field>
+ <field name="colName">
+ <column name="COLUMN_NAME" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="colType">
+ <column name="COLUMN_TYPE" length="128" jdbc-type="VARCHAR" allows-null="false"/>
+ </field>
+ <field name="lowValue">
+ <column name="LOW_VALUE" jdbc-type="LONGVARBINARY" allows-null="true"/>
+ </field>
+ <field name="highValue">
+ <column name="HIGH_VALUE" jdbc-type="LONGVARBINARY" allows-null="true"/>
+ </field>
+ <field name="numNulls">
+ <column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/>
+ </field>
+ <field name="numDVs">
+ <column name="NUM_DISTINCTS" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="avgColLen">
+ <column name="AVG_COL_LEN" jdbc-type="DOUBLE" allows-null="true"/>
+ </field>
+ <field name="maxColLen">
+ <column name="MAX_COL_LEN" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="numTrues">
+ <column name="NUM_TRUES" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="numFalses">
+ <column name="NUM_FALSES" jdbc-type="BIGINT" allows-null="true"/>
+ </field>
+ <field name="lastAnalyzed">
+ <column name="LAST_ANALYZED" jdbc-type="BIGINT" allows-null="false"/>
+ </field>
+ </class>
+
</package>
</jdo>
Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Wed Nov 7 04:55:00 2012
@@ -25,8 +25,10 @@ import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -512,4 +514,46 @@ public class DummyRawStoreForJdoConnecti
return 0;
}
+ @Override
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName)
+ throws MetaException, NoSuchObjectException {
+ return null;
+ }
+
+
+ @Override
+ public boolean deleteTableColumnStatistics(String dbName, String tableName,
+ String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
+
+
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVals, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException,
+ InvalidInputException {
+ return false;
+
+ }
+
+ @Override
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List<String> partVal, String colName) throws MetaException,
+ NoSuchObjectException, InvalidInputException, InvalidObjectException {
+ return null;
+ }
+
+ @Override
+ public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
+
+ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,List<String> partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
}
+
+
Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java (original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java Wed Nov 7 04:55:00 2012
@@ -37,8 +37,13 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
@@ -49,6 +54,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
@@ -1228,6 +1234,187 @@ public abstract class TestHiveMetaStore
}
}
+ public void testColumnStatistics() throws Throwable {
+
+ String dbName = "columnstatstestdb";
+ String tblName = "tbl";
+ String typeName = "Person";
+ String tblOwner = "testowner";
+ int lastAccessed = 6796;
+
+ try {
+ cleanUp(dbName, tblName, typeName);
+ Database db = new Database();
+ db.setName(dbName);
+ client.createDatabase(db);
+ createTableForTestFilter(dbName,tblName, tblOwner, lastAccessed, true);
+
+ // Create a ColumnStatistics Obj
+ String[] colName = new String[]{"income", "name"};
+ double lowValue = 50000.21;
+ double highValue = 1200000.4525;
+ long numNulls = 3;
+ long numDVs = 22;
+ double avgColLen = 50.30;
+ long maxColLen = 102;
+ String[] colType = new String[] {"double", "string"};
+ boolean isTblLevel = true;
+ String partName = null;
+ List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
+
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tblName);
+ statsDesc.setIsTblLevel(isTblLevel);
+ statsDesc.setPartName(partName);
+
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColName(colName[0]);
+ statsObj.setColType(colType[0]);
+
+ ColumnStatisticsData statsData = new ColumnStatisticsData();
+ DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
+ statsData.setDoubleStats(numericStats);
+
+ statsData.getDoubleStats().setHighValue(highValue);
+ statsData.getDoubleStats().setLowValue(lowValue);
+ statsData.getDoubleStats().setNumDVs(numDVs);
+ statsData.getDoubleStats().setNumNulls(numNulls);
+
+ statsObj.setStatsData(statsData);
+ statsObjs.add(statsObj);
+
+ statsObj = new ColumnStatisticsObj();
+ statsObj.setColName(colName[1]);
+ statsObj.setColType(colType[1]);
+
+ statsData = new ColumnStatisticsData();
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ statsData.setStringStats(stringStats);
+ statsData.getStringStats().setAvgColLen(avgColLen);
+ statsData.getStringStats().setMaxColLen(maxColLen);
+ statsData.getStringStats().setNumDVs(numDVs);
+ statsData.getStringStats().setNumNulls(numNulls);
+
+ statsObj.setStatsData(statsData);
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ // write stats objs persistently
+ client.updateTableColumnStatistics(colStats);
+
+ // retrieve the stats obj that was just written
+ ColumnStatistics colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+
+ // compare stats obj to ensure what we get is what we wrote
+ assertNotNull(colStats2);
+ assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
+ assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
+ assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[0]);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getLowValue(),
+ lowValue);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getHighValue(),
+ highValue);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumNulls(),
+ numNulls);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumDVs(),
+ numDVs);
+ assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+
+ // test delete column stats; if no col name is passed all column stats associated with the
+ // table is deleted
+ boolean status = client.deleteTableColumnStatistics(dbName, tblName, null);
+ assertTrue(status);
+ // try to query stats for a column for which stats doesn't exist
+ try {
+ colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[1]);
+ assertTrue(true);
+ } catch (NoSuchObjectException e) {
+ System.out.println("Statistics for column=" + colName[1] + " not found");
+ }
+
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ // update table level column stats
+ client.updateTableColumnStatistics(colStats);
+
+ // query column stats for column whose stats were updated in the previous call
+ colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+
+ // partition level column statistics test
+ // create a table with multiple partitions
+ cleanUp(dbName, tblName, typeName);
+
+ List<List<String>> values = new ArrayList<List<String>>();
+ values.add(makeVals("2008-07-01 14:13:12", "14"));
+ values.add(makeVals("2008-07-01 14:13:12", "15"));
+ values.add(makeVals("2008-07-02 14:13:12", "15"));
+ values.add(makeVals("2008-07-03 14:13:12", "151"));
+
+ createMultiPartitionTableSchema(dbName, tblName, typeName, values);
+
+ List<String> partitions = client.listPartitionNames(dbName, tblName, (short)-1);
+
+ partName = partitions.get(0);
+ isTblLevel = false;
+
+ // create a new columnstatistics desc to represent partition level column stats
+ statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tblName);
+ statsDesc.setPartName(partName);
+ statsDesc.setIsTblLevel(isTblLevel);
+
+ colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ client.updatePartitionColumnStatistics(colStats);
+
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+
+ // compare stats obj to ensure what we get is what we wrote
+ assertNotNull(colStats2);
+ assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
+ assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
+ assertEquals(colStats.getStatsDesc().getPartName(), partName);
+ assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[1]);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getMaxColLen(),
+ maxColLen);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getAvgColLen(),
+ avgColLen);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumNulls(),
+ numNulls);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumDVs(),
+ numDVs);
+ assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+
+ // test stats deletion at partition level
+ client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[0]);
+
+ // test get stats on a column for which stats doesn't exist
+ try {
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+ assertTrue(true);
+ } catch (NoSuchObjectException e) {
+ System.out.println("Statistics for column=" + colName[1] + " not found");
+ }
+
+ } catch (Exception e) {
+ System.err.println(StringUtils.stringifyException(e));
+ System.err.println("testColumnStatistics() failed.");
+ throw e;
+ } finally {
+ cleanUp(dbName, tblName, typeName);
+ }
+ }
+
public void testAlterTable() throws Exception {
String dbName = "alterdb";
String invTblName = "alter-tbl";
Modified: hive/trunk/ql/build.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/build.xml?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/build.xml (original)
+++ hive/trunk/ql/build.xml Wed Nov 7 04:55:00 2012
@@ -214,6 +214,7 @@
<exclude name="META-INF/MANIFEST.MF"/>
</patternset>
</unzip>
+
<!-- jar jarfile="${build.dir}/hive_${name}.jar" basedir="${build.classes}" / -->
<jar jarfile="${build.dir}/hive-exec-${version}.jar">
<fileset dir="${build.dir.hive}/common/classes" includes="**/*.class"/>
Modified: hive/trunk/ql/if/queryplan.thrift
URL: http://svn.apache.org/viewvc/hive/trunk/ql/if/queryplan.thrift?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/if/queryplan.thrift (original)
+++ hive/trunk/ql/if/queryplan.thrift Wed Nov 7 04:55:00 2012
@@ -91,6 +91,7 @@ enum StageType {
MOVE,
STATS,
DEPENDENCY_COLLECTION,
+ COLUMNSTATS,
}
struct Stage {
Modified: hive/trunk/ql/ivy.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/ivy.xml?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/ivy.xml (original)
+++ hive/trunk/ql/ivy.xml Wed Nov 7 04:55:00 2012
@@ -74,7 +74,7 @@
<dependency org="org.datanucleus" name="datanucleus-rdbms" rev="${datanucleus-rdbms.version}"
transitive="false"/>
<dependency org="javolution" name="javolution" rev="${javolution.version}"/>
-
+
<!-- Hack to get jobclient tests dependency in. -->
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="${hadoop-0.23.version}"
conf="hadoop23.test->default" transitive="false">
Modified: hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp (original)
+++ hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp Wed Nov 7 04:55:00 2012
@@ -92,7 +92,8 @@ int _kStageTypeValues[] = {
StageType::MAPREDLOCAL,
StageType::MOVE,
StageType::STATS,
- StageType::DEPENDENCY_COLLECTION
+ StageType::DEPENDENCY_COLLECTION,
+ StageType::COLUMNSTATS
};
const char* _kStageTypeNames[] = {
"CONDITIONAL",
@@ -105,9 +106,10 @@ const char* _kStageTypeNames[] = {
"MAPREDLOCAL",
"MOVE",
"STATS",
- "DEPENDENCY_COLLECTION"
+ "DEPENDENCY_COLLECTION",
+ "COLUMNSTATS"
};
-const std::map<int, const char*> _StageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(11, _kStageTypeValues, _kStageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+const std::map<int, const char*> _StageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(12, _kStageTypeValues, _kStageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
const char* Adjacency::ascii_fingerprint = "BC4F8C394677A1003AA9F56ED26D8204";
const uint8_t Adjacency::binary_fingerprint[16] = {0xBC,0x4F,0x8C,0x39,0x46,0x77,0xA1,0x00,0x3A,0xA9,0xF5,0x6E,0xD2,0x6D,0x82,0x04};
Modified: hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h (original)
+++ hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h Wed Nov 7 04:55:00 2012
@@ -80,7 +80,8 @@ struct StageType {
MAPREDLOCAL = 7,
MOVE = 8,
STATS = 9,
- DEPENDENCY_COLLECTION = 10
+ DEPENDENCY_COLLECTION = 10,
+ COLUMNSTATS = 11
};
};
Modified: hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java (original)
+++ hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java Wed Nov 7 04:55:00 2012
@@ -21,7 +21,8 @@ public enum StageType implements org.apa
MAPREDLOCAL(7),
MOVE(8),
STATS(9),
- DEPENDENCY_COLLECTION(10);
+ DEPENDENCY_COLLECTION(10),
+ COLUMNSTATS(11);
private final int value;
@@ -64,6 +65,8 @@ public enum StageType implements org.apa
return STATS;
case 10:
return DEPENDENCY_COLLECTION;
+ case 11:
+ return COLUMNSTATS;
default:
return null;
}
Modified: hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php (original)
+++ hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php Wed Nov 7 04:55:00 2012
@@ -126,6 +126,7 @@ $GLOBALS['E_StageType'] = array(
'MOVE' => 8,
'STATS' => 9,
'DEPENDENCY_COLLECTION' => 10,
+ 'COLUMNSTATS' => 11,
);
final class StageType {
@@ -140,6 +141,7 @@ final class StageType {
const MOVE = 8;
const STATS = 9;
const DEPENDENCY_COLLECTION = 10;
+ const COLUMNSTATS = 11;
static public $__names = array(
0 => 'CONDITIONAL',
1 => 'COPY',
@@ -152,6 +154,7 @@ final class StageType {
8 => 'MOVE',
9 => 'STATS',
10 => 'DEPENDENCY_COLLECTION',
+ 11 => 'COLUMNSTATS',
);
}
Modified: hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py (original)
+++ hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py Wed Nov 7 04:55:00 2012
@@ -133,6 +133,7 @@ class StageType:
MOVE = 8
STATS = 9
DEPENDENCY_COLLECTION = 10
+ COLUMNSTATS = 11
_VALUES_TO_NAMES = {
0: "CONDITIONAL",
@@ -146,6 +147,7 @@ class StageType:
8: "MOVE",
9: "STATS",
10: "DEPENDENCY_COLLECTION",
+ 11: "COLUMNSTATS",
}
_NAMES_TO_VALUES = {
@@ -160,6 +162,7 @@ class StageType:
"MOVE": 8,
"STATS": 9,
"DEPENDENCY_COLLECTION": 10,
+ "COLUMNSTATS": 11,
}
Modified: hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb (original)
+++ hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb Wed Nov 7 04:55:00 2012
@@ -62,8 +62,9 @@ module StageType
MOVE = 8
STATS = 9
DEPENDENCY_COLLECTION = 10
- VALUE_MAP = {0 => "CONDITIONAL", 1 => "COPY", 2 => "DDL", 3 => "MAPRED", 4 => "EXPLAIN", 5 => "FETCH", 6 => "FUNC", 7 => "MAPREDLOCAL", 8 => "MOVE", 9 => "STATS", 10 => "DEPENDENCY_COLLECTION"}
- VALID_VALUES = Set.new([CONDITIONAL, COPY, DDL, MAPRED, EXPLAIN, FETCH, FUNC, MAPREDLOCAL, MOVE, STATS, DEPENDENCY_COLLECTION]).freeze
+ COLUMNSTATS = 11
+ VALUE_MAP = {0 => "CONDITIONAL", 1 => "COPY", 2 => "DDL", 3 => "MAPRED", 4 => "EXPLAIN", 5 => "FETCH", 6 => "FUNC", 7 => "MAPREDLOCAL", 8 => "MOVE", 9 => "STATS", 10 => "DEPENDENCY_COLLECTION", 11 => "COLUMNSTATS"}
+ VALID_VALUES = Set.new([CONDITIONAL, COPY, DDL, MAPRED, EXPLAIN, FETCH, FUNC, MAPREDLOCAL, MOVE, STATS, DEPENDENCY_COLLECTION, COLUMNSTATS]).freeze
end
class Adjacency
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Wed Nov 7 04:55:00 2012
@@ -314,6 +314,16 @@ public enum ErrorMsg {
"might help. If you dont want the query to fail because accurate statistics " +
"could not be collected, set hive.stats.reliable=false"),
+ COLUMNSTATSCOLLECTOR_INVALID_PART_KEY(30005, "Invalid partitioning key specified in ANALYZE " +
+ "statement"),
+ COLUMNSTATSCOLLECTOR_INCORRECT_NUM_PART_KEY(30006, "Incorrect number of partitioning key " +
+ "specified in ANALYZE statement"),
+ COLUMNSTATSCOLLECTOR_INVALID_PARTITION(30007, "Invalid partitioning key/value specified in " +
+ "ANALYZE statement"),
+ COLUMNSTATSCOLLECTOR_INVALID_SYNTAX(30008, "Dynamic partitioning is not supported yet while " +
+ "gathering column statistics through ANALYZE statement"),
+ COLUMNSTATSCOLLECTOR_PARSE_ERROR(30009, "Encountered parse error while parsing rewritten query"),
+ COLUMNSTATSCOLLECTOR_IO_ERROR(30010, "Encountered I/O exception while parsing rewritten query")
;
private int errorCode;