You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/06/02 00:12:02 UTC
svn commit: r1599068 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/metadata/formatting/
java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
test/queries/clientpositive/ test/results/...
Author: xuefu
Date: Sun Jun 1 22:12:01 2014
New Revision: 1599068
URL: http://svn.apache.org/r1599068
Log:
HIVE-7050: Display table level column stats in DESCRIBE EXTENDED/FORMATTED TABLE (Prasanth J via Xuefu)
Added:
hive/trunk/ql/src/test/queries/clientpositive/display_colstats_tbllvl.q
hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
hive/trunk/ql/src/test/results/clientpositive/describe_syntax.q.out
hive/trunk/ql/src/test/results/clientpositive/describe_table.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Sun Jun 1 22:12:01 2014
@@ -38,14 +38,12 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Properties;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -63,6 +61,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
@@ -117,7 +116,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter;
import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.AlterDatabaseDesc;
import org.apache.hadoop.hive.ql.plan.AlterIndexDesc;
@@ -601,14 +599,9 @@ public class DDLTask extends Task<DDLWor
Database dbObj = null;
if (hiveObjectDesc.getTable()) {
- String[] dbTab = obj.split("\\.");
- if (dbTab.length == 2) {
- dbName = dbTab[0];
- tableName = dbTab[1];
- } else {
- dbName = SessionState.get().getCurrentDatabase();
- tableName = obj;
- }
+ String[] dbTab = splitTableName(obj);
+ dbName = dbTab[0];
+ tableName = dbTab[1];
dbObj = db.getDatabase(dbName);
tableObj = db.getTable(dbName, tableName);
notFound = (dbObj == null || tableObj == null);
@@ -670,6 +663,19 @@ public class DDLTask extends Task<DDLWor
return 0;
}
+ private static String[] splitTableName(String fullName) {
+ String[] dbTab = fullName.split("\\.");
+ String[] result = new String[2];
+ if (dbTab.length == 2) {
+ result[0] = dbTab[0];
+ result[1] = dbTab[1];
+ } else {
+ result[0] = SessionState.get().getCurrentDatabase();
+ result[1] = fullName;
+ }
+ return result;
+ }
+
private int showGrantsV2(ShowGrantDesc showGrantDesc) throws HiveException {
HiveAuthorizer authorizer = SessionState.get().getAuthorizerV2();
try {
@@ -2590,7 +2596,7 @@ public class DDLTask extends Task<DDLWor
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(
- cols, false, isOutputPadded));
+ cols, false, isOutputPadded, null));
outStream.close();
outStream = null;
} catch (IOException e) {
@@ -3415,6 +3421,7 @@ public class DDLTask extends Task<DDLWor
outStream = fs.create(resFile);
List<FieldSchema> cols = null;
+ List<ColumnStatisticsObj> colStats = null;
if (colPath.equals(tableName)) {
cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ?
tbl.getCols() : part.getCols();
@@ -3424,6 +3431,16 @@ public class DDLTask extends Task<DDLWor
}
} else {
cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer());
+ if (descTbl.isFormatted()) {
+ // when column name is specified in describe table DDL, colPath will
+ // will be table_name.column_name
+ String colName = colPath.split("\\.")[1];
+ String[] dbTab = splitTableName(tableName);
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(colName.toLowerCase());
+ colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(),
+ dbTab[1].toLowerCase(), colNames);
+ }
}
fixDecimalColumnTypeName(cols);
@@ -3432,7 +3449,7 @@ public class DDLTask extends Task<DDLWor
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
formatter.describeTable(outStream, colPath, tableName, tbl, part,
cols, descTbl.isFormatted(), descTbl.isExt(),
- descTbl.isPretty(), isOutputPadded);
+ descTbl.isPretty(), isOutputPadded, colStats);
LOG.info("DDLTask: written data for " + tbl.getTableName());
outStream.close();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java Sun Jun 1 22:12:01 2014
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -101,7 +102,7 @@ public class JsonMetaDataFormatter imple
public void describeTable(DataOutputStream out, String colPath,
String tableName, Table tbl, Partition part, List<FieldSchema> cols,
boolean isFormatted, boolean isExt, boolean isPretty,
- boolean isOutputPadded) throws HiveException {
+ boolean isOutputPadded, List<ColumnStatisticsObj> colStats) throws HiveException {
MapBuilder builder = MapBuilder.create();
builder.put("columns", makeColsUnformatted(cols));
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java Sun Jun 1 22:12:01 2014
@@ -31,9 +31,17 @@ import org.apache.commons.lang.StringEsc
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.ql.index.HiveIndex;
import org.apache.hadoop.hive.ql.index.HiveIndex.IndexType;
import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -58,9 +66,10 @@ public final class MetaDataFormatUtils {
private MetaDataFormatUtils() {
}
- private static void formatColumnsHeader(StringBuilder columnInformation) {
+ private static void formatColumnsHeader(StringBuilder columnInformation,
+ List<ColumnStatisticsObj> colStats) {
columnInformation.append("# "); // Easy for shell scripts to ignore
- formatOutput(getColumnsHeader(), columnInformation);
+ formatOutput(getColumnsHeader(colStats), columnInformation);
columnInformation.append(LINE_DELIM);
}
@@ -70,15 +79,17 @@ public final class MetaDataFormatUtils {
* @param printHeader - if header should be included
* @param isOutputPadded - make it more human readable by setting indentation
* with spaces. Turned off for use by HiveServer2
+ * @param colStats
* @return string with formatted column information
*/
public static String getAllColumnsInformation(List<FieldSchema> cols,
- boolean printHeader, boolean isOutputPadded) {
+ boolean printHeader, boolean isOutputPadded, List<ColumnStatisticsObj> colStats) {
StringBuilder columnInformation = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE);
if(printHeader){
- formatColumnsHeader(columnInformation);
+ formatColumnsHeader(columnInformation, colStats);
}
- formatAllFields(columnInformation, cols, isOutputPadded);
+
+ formatAllFields(columnInformation, cols, isOutputPadded, colStats);
return columnInformation.toString();
}
@@ -96,15 +107,15 @@ public final class MetaDataFormatUtils {
List<FieldSchema> partCols, boolean printHeader, boolean isOutputPadded, boolean showPartColsSep) {
StringBuilder columnInformation = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE);
if(printHeader){
- formatColumnsHeader(columnInformation);
+ formatColumnsHeader(columnInformation, null);
}
- formatAllFields(columnInformation, cols, isOutputPadded);
+ formatAllFields(columnInformation, cols, isOutputPadded, null);
if ((partCols != null) && !partCols.isEmpty() && showPartColsSep) {
columnInformation.append(LINE_DELIM).append("# Partition Information")
.append(LINE_DELIM);
- formatColumnsHeader(columnInformation);
- formatAllFields(columnInformation, partCols, isOutputPadded);
+ formatColumnsHeader(columnInformation, null);
+ formatAllFields(columnInformation, partCols, isOutputPadded, null);
}
return columnInformation.toString();
@@ -116,25 +127,72 @@ public final class MetaDataFormatUtils {
* @param cols - list of columns
* @param isOutputPadded - make it more human readable by setting indentation
* with spaces. Turned off for use by HiveServer2
+ * @param colStats
*/
private static void formatAllFields(StringBuilder tableInfo,
- List<FieldSchema> cols, boolean isOutputPadded) {
+ List<FieldSchema> cols, boolean isOutputPadded, List<ColumnStatisticsObj> colStats) {
for (FieldSchema col : cols) {
if(isOutputPadded) {
- formatWithIndentation(col.getName(), col.getType(), getComment(col), tableInfo);
+ formatWithIndentation(col.getName(), col.getType(), getComment(col), tableInfo, colStats);
}
else {
- formatWithoutIndentation(col.getName(), col.getType(), col.getComment(), tableInfo);
+ formatWithoutIndentation(col.getName(), col.getType(), col.getComment(), tableInfo, colStats);
+ }
+ }
+ }
+
+ private static ColumnStatisticsObj getColumnStatisticsObject(String colName,
+ String colType, List<ColumnStatisticsObj> colStats) {
+ if (colStats != null && !colStats.isEmpty()) {
+ for (ColumnStatisticsObj cso : colStats) {
+ if (cso.getColName().equalsIgnoreCase(colName)
+ && cso.getColType().equalsIgnoreCase(colType)) {
+ return cso;
+ }
}
}
+ return null;
}
private static void formatWithoutIndentation(String name, String type, String comment,
- StringBuilder colBuffer) {
+ StringBuilder colBuffer, List<ColumnStatisticsObj> colStats) {
colBuffer.append(name);
colBuffer.append(FIELD_DELIM);
colBuffer.append(type);
colBuffer.append(FIELD_DELIM);
+ if (colStats != null) {
+ ColumnStatisticsObj cso = getColumnStatisticsObject(name, type, colStats);
+ if (cso != null) {
+ ColumnStatisticsData csd = cso.getStatsData();
+ if (csd.isSetBinaryStats()) {
+ BinaryColumnStatsData bcsd = csd.getBinaryStats();
+ appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "",
+ bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", "");
+ } else if (csd.isSetStringStats()) {
+ StringColumnStatsData scsd = csd.getStringStats();
+ appendColumnStatsNoFormatting(colBuffer, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
+ scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
+ } else if (csd.isSetBooleanStats()) {
+ BooleanColumnStatsData bcsd = csd.getBooleanStats();
+ appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", "", "",
+ bcsd.getNumTrues(), bcsd.getNumFalses());
+ } else if (csd.isSetDecimalStats()) {
+ DecimalColumnStatsData dcsd = csd.getDecimalStats();
+ appendColumnStatsNoFormatting(colBuffer, dcsd.getLowValue(), dcsd.getHighValue(),
+ dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
+ } else if (csd.isSetDoubleStats()) {
+ DoubleColumnStatsData dcsd = csd.getDoubleStats();
+ appendColumnStatsNoFormatting(colBuffer, dcsd.getLowValue(), dcsd.getHighValue(),
+ dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
+ } else if (csd.isSetLongStats()) {
+ LongColumnStatsData lcsd = csd.getLongStats();
+ appendColumnStatsNoFormatting(colBuffer, lcsd.getLowValue(), lcsd.getHighValue(),
+ lcsd.getNumNulls(), lcsd.getNumDVs(), "", "", "", "");
+ }
+ } else {
+ appendColumnStatsNoFormatting(colBuffer, "", "", "", "", "", "", "", "");
+ }
+ }
colBuffer.append(comment == null ? "" : comment);
colBuffer.append(LINE_DELIM);
}
@@ -341,10 +399,44 @@ public final class MetaDataFormatUtils {
}
private static void formatWithIndentation(String colName, String colType, String colComment,
- StringBuilder tableInfo) {
+ StringBuilder tableInfo, List<ColumnStatisticsObj> colStats) {
tableInfo.append(String.format("%-" + ALIGNMENT + "s", colName)).append(FIELD_DELIM);
tableInfo.append(String.format("%-" + ALIGNMENT + "s", colType)).append(FIELD_DELIM);
+ if (colStats != null) {
+ ColumnStatisticsObj cso = getColumnStatisticsObject(colName, colType, colStats);
+ if (cso != null) {
+ ColumnStatisticsData csd = cso.getStatsData();
+ if (csd.isSetBinaryStats()) {
+ BinaryColumnStatsData bcsd = csd.getBinaryStats();
+ appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(),
+ bcsd.getMaxColLen(), "", "");
+ } else if (csd.isSetStringStats()) {
+ StringColumnStatsData scsd = csd.getStringStats();
+ appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
+ scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
+ } else if (csd.isSetBooleanStats()) {
+ BooleanColumnStatsData bcsd = csd.getBooleanStats();
+ appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "",
+ bcsd.getNumTrues(), bcsd.getNumFalses());
+ } else if (csd.isSetDecimalStats()) {
+ DecimalColumnStatsData dcsd = csd.getDecimalStats();
+ appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
+ dcsd.getNumDVs(), "", "", "", "");
+ } else if (csd.isSetDoubleStats()) {
+ DoubleColumnStatsData dcsd = csd.getDoubleStats();
+ appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
+ dcsd.getNumDVs(), "", "", "", "");
+ } else if (csd.isSetLongStats()) {
+ LongColumnStatsData lcsd = csd.getLongStats();
+ appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(),
+ lcsd.getNumDVs(), "", "", "", "");
+ }
+ } else {
+ appendColumnStats(tableInfo, "", "", "", "", "", "", "", "");
+ }
+ }
+
// comment indent processing for multi-line comments
// comments should be indented the same amount on each line
// if the first line comment starts indented by k,
@@ -359,8 +451,37 @@ public final class MetaDataFormatUtils {
}
}
- public static String[] getColumnsHeader() {
- return DescTableDesc.getSchema().split("#")[0].split(",");
+ private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls,
+ Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) {
+ sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", ndv)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", avgColLen)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", numFalses)).append(FIELD_DELIM);
+ }
+
+ private static void appendColumnStatsNoFormatting(StringBuilder sb, Object min,
+ Object max, Object numNulls, Object ndv, Object avgColLen, Object maxColLen,
+ Object numTrues, Object numFalses) {
+ sb.append(min).append(FIELD_DELIM);
+ sb.append(max).append(FIELD_DELIM);
+ sb.append(numNulls).append(FIELD_DELIM);
+ sb.append(ndv).append(FIELD_DELIM);
+ sb.append(avgColLen).append(FIELD_DELIM);
+ sb.append(maxColLen).append(FIELD_DELIM);
+ sb.append(numTrues).append(FIELD_DELIM);
+ sb.append(numFalses).append(FIELD_DELIM);
+ }
+
+ public static String[] getColumnsHeader(List<ColumnStatisticsObj> colStats) {
+ boolean showColStats = false;
+ if (colStats != null) {
+ showColStats = true;
+ }
+ return DescTableDesc.getSchema(showColStats).split("#")[0].split(",");
}
public static String getIndexColumnsHeader() {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java Sun Jun 1 22:12:01 2014
@@ -25,6 +25,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -69,12 +70,13 @@ public interface MetaDataFormatter {
* @param isExt
* @param isPretty
* @param isOutputPadded - if true, add spacing and indentation
+ * @param colStats
* @throws HiveException
*/
public void describeTable(DataOutputStream out, String colPath,
String tableName, Table tbl, Partition part, List<FieldSchema> cols,
boolean isFormatted, boolean isExt, boolean isPretty,
- boolean isOutputPadded)
+ boolean isOutputPadded, List<ColumnStatisticsObj> colStats)
throws HiveException;
/**
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java Sun Jun 1 22:12:01 2014
@@ -86,7 +86,7 @@ public final class MetaDataPrettyFormatU
*/
private static void formatColumnsHeaderPretty(StringBuilder columnInformation,
int maxColNameLen, int prettyOutputNumCols) {
- String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader();
+ String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader(null);
formatOutputPretty(columnHeaders[0], columnHeaders[1], columnHeaders[2],
columnInformation, maxColNameLen, prettyOutputNumCols);
columnInformation.append(MetaDataFormatUtils.LINE_DELIM);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java Sun Jun 1 22:12:01 2014
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -116,7 +117,7 @@ class TextMetaDataFormatter implements M
public void describeTable(DataOutputStream outStream, String colPath,
String tableName, Table tbl, Partition part, List<FieldSchema> cols,
boolean isFormatted, boolean isExt, boolean isPretty,
- boolean isOutputPadded) throws HiveException {
+ boolean isOutputPadded, List<ColumnStatisticsObj> colStats) throws HiveException {
try {
String output;
if (colPath.equals(tableName)) {
@@ -127,7 +128,7 @@ class TextMetaDataFormatter implements M
:
MetaDataFormatUtils.getAllColumnsInformation(cols, partCols, isFormatted, isOutputPadded, showPartColsSeparately);
} else {
- output = MetaDataFormatUtils.getAllColumnsInformation(cols, isFormatted, isOutputPadded);
+ output = MetaDataFormatUtils.getAllColumnsInformation(cols, isFormatted, isOutputPadded, colStats);
}
outStream.write(output.getBytes("UTF-8"));
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sun Jun 1 22:12:01 2014
@@ -57,6 +57,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
+import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -1980,17 +1981,27 @@ public class DDLSemanticAnalyzer extends
DescTableDesc descTblDesc = new DescTableDesc(
ctx.getResFile(), tableName, partSpec, colPath);
+ boolean showColStats = false;
if (ast.getChildCount() == 2) {
int descOptions = ast.getChild(1).getType();
descTblDesc.setFormatted(descOptions == HiveParser.KW_FORMATTED);
descTblDesc.setExt(descOptions == HiveParser.KW_EXTENDED);
descTblDesc.setPretty(descOptions == HiveParser.KW_PRETTY);
+ // in case of "DESCRIBE FORMATTED tablename column_name" statement, colPath
+ // will contain tablename.column_name. If column_name is not specified
+ // colPath will be equal to tableName. This is how we can differentiate
+ // if we are describing a table or column
+ if (!colPath.equalsIgnoreCase(tableName) && descTblDesc.isFormatted()) {
+ showColStats = true;
+ }
}
inputs.add(new ReadEntity(getTable(tableName)));
- rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
- descTblDesc), conf));
- setFetchTask(createFetchTask(DescTableDesc.getSchema()));
+ Task ddlTask = TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+ descTblDesc), conf);
+ rootTasks.add(ddlTask);
+ String schema = DescTableDesc.getSchema(showColStats);
+ setFetchTask(createFetchTask(schema));
LOG.info("analyzeDescribeTable done");
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java Sun Jun 1 22:12:01 2014
@@ -56,6 +56,9 @@ public class DescTableDesc extends DDLDe
* thrift ddl for the result of describe table.
*/
private static final String schema = "col_name,data_type,comment#string:string:string";
+ private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls,"
+ + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment"
+ + "#string:string:string:string:string:string:string:string:string:string:string";
public DescTableDesc() {
}
@@ -80,7 +83,10 @@ public class DescTableDesc extends DDLDe
return table;
}
- public static String getSchema() {
+ public static String getSchema(boolean colStats) {
+ if (colStats) {
+ return colStatsSchema;
+ }
return schema;
}
Added: hive/trunk/ql/src/test/queries/clientpositive/display_colstats_tbllvl.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/display_colstats_tbllvl.q?rev=1599068&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/display_colstats_tbllvl.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/display_colstats_tbllvl.q Sun Jun 1 22:12:01 2014
@@ -0,0 +1,75 @@
+DROP TABLE IF EXISTS UserVisits_web_text_none;
+
+CREATE TABLE UserVisits_web_text_none (
+ sourceIP string,
+ destURL string,
+ visitDate string,
+ adRevenue float,
+ userAgent string,
+ cCode string,
+ lCode string,
+ sKeyword string,
+ avgTimeOnSite int)
+row format delimited fields terminated by '|' stored as textfile;
+
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+
+desc extended UserVisits_web_text_none sourceIP;
+desc formatted UserVisits_web_text_none sourceIP;
+
+explain
+analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue;
+
+explain extended
+analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue;
+
+analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue;
+desc formatted UserVisits_web_text_none sourceIP;
+desc formatted UserVisits_web_text_none avgTimeOnSite;
+desc formatted UserVisits_web_text_none adRevenue;
+
+CREATE TABLE empty_tab(
+ a int,
+ b double,
+ c string,
+ d boolean,
+ e binary)
+row format delimited fields terminated by '|' stored as textfile;
+
+desc formatted empty_tab a;
+explain
+analyze table empty_tab compute statistics for columns a,b,c,d,e;
+
+analyze table empty_tab compute statistics for columns a,b,c,d,e;
+desc formatted empty_tab a;
+desc formatted empty_tab b;
+
+CREATE DATABASE test;
+USE test;
+
+CREATE TABLE UserVisits_web_text_none (
+ sourceIP string,
+ destURL string,
+ visitDate string,
+ adRevenue float,
+ userAgent string,
+ cCode string,
+ lCode string,
+ sKeyword string,
+ avgTimeOnSite int)
+row format delimited fields terminated by '|' stored as textfile;
+
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+
+desc extended UserVisits_web_text_none sourceIP;
+desc extended test.UserVisits_web_text_none sourceIP;
+desc extended default.UserVisits_web_text_none sourceIP;
+desc formatted UserVisits_web_text_none sourceIP;
+desc formatted test.UserVisits_web_text_none sourceIP;
+desc formatted default.UserVisits_web_text_none sourceIP;
+
+analyze table UserVisits_web_text_none compute statistics for columns sKeyword;
+desc extended UserVisits_web_text_none sKeyword;
+desc formatted UserVisits_web_text_none sKeyword;
+desc formatted test.UserVisits_web_text_none sKeyword;
+
Modified: hive/trunk/ql/src/test/results/clientpositive/describe_syntax.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/describe_syntax.q.out?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/describe_syntax.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/describe_syntax.q.out Sun Jun 1 22:12:01 2014
@@ -202,9 +202,9 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED t1 key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type comment
-
-key1 int from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key1 int from deserializer
PREHOOK: query: -- describe database.tabe column
DESCRIBE db1.t1 key1
PREHOOK: type: DESCTABLE
@@ -227,9 +227,9 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED db1.t1 key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type comment
-
-key1 int from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key1 int from deserializer
PREHOOK: query: -- describe table.column
-- after first checking t1.key1 for database.table not valid
-- fall back to the old syntax table.column
@@ -256,9 +256,9 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED t1.key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type comment
-
-key1 int from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key1 int from deserializer
PREHOOK: query: -- describe table partition
DESCRIBE t1 PARTITION(ds='4', part='5')
PREHOOK: type: DESCTABLE
Modified: hive/trunk/ql/src/test/results/clientpositive/describe_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/describe_table.q.out?rev=1599068&r1=1599067&r2=1599068&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/describe_table.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/describe_table.q.out Sun Jun 1 22:12:01 2014
@@ -205,9 +205,9 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted srcpart.key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type comment
-
-key string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key string from deserializer
PREHOOK: query: describe formatted srcpart PARTITION(ds='2008-04-08', hr='12')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
@@ -293,9 +293,9 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted `srcpart`.`key`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type comment
-
-key string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key string from deserializer
PREHOOK: query: describe formatted `srcpart` PARTITION(ds='2008-04-08', hr='12')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
Added: hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out?rev=1599068&view=auto
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out (added) and hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out Sun Jun 1 22:12:01 2014 differ