You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/08/25 04:44:25 UTC

svn commit: r1377187 [8/8] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ metastore/if/ metastore/scripts/upgrade/derby/ metastore/scripts/upgrade/mysql/ metastore/scripts/upgrade/oracle/ metastore/scripts/upgrade/postgres/ metastore/sr...

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java Sat Aug 25 02:44:22 2012
@@ -39,6 +39,10 @@ public class ColumnInfo implements Seria
 
   private String alias = null; // [optional] alias of the column (external name
   // as seen by the users)
+  /**
+   * Indicates whether the column is a skewed column.
+   */
+  private boolean isSkewedCol;
 
   /**
    * Store the alias of the table where available.
@@ -73,17 +77,17 @@ public class ColumnInfo implements Seria
       boolean isVirtualCol, boolean isHiddenVirtualCol) {
     this(internalName,
          TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(type),
-         tabAlias, 
+         tabAlias,
          isVirtualCol,
          isHiddenVirtualCol);
   }
 
-  public ColumnInfo(String internalName, ObjectInspector objectInspector, 
+  public ColumnInfo(String internalName, ObjectInspector objectInspector,
       String tabAlias, boolean isVirtualCol) {
     this(internalName, objectInspector, tabAlias, isVirtualCol, false);
   }
 
-  public ColumnInfo(String internalName, ObjectInspector objectInspector, 
+  public ColumnInfo(String internalName, ObjectInspector objectInspector,
       String tabAlias, boolean isVirtualCol, boolean isHiddenVirtualCol) {
     this.internalName = internalName;
     this.objectInspector = objectInspector;
@@ -153,5 +157,17 @@ public class ColumnInfo implements Seria
     this.isHiddenVirtualCol = isHiddenVirtualCol;
   }
 
+  /**
+   * @return the isSkewedCol
+   */
+  public boolean isSkewedCol() {
+    return isSkewedCol;
+  }
 
+  /**
+   * @param isSkewedCol the isSkewedCol to set
+   */
+  public void setSkewedCol(boolean isSkewedCol) {
+    this.isSkewedCol = isSkewedCol;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Sat Aug 25 02:44:22 2012
@@ -2792,6 +2792,7 @@ public class DDLTask extends Task<DDLWor
       }
 
       tbl.getTTable().getSd().setCols(newCols);
+
     } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
       // change SerDe to LazySimpleSerDe if it is columnsetSerDe
       if (tbl.getSerializationLib().equals(
@@ -3327,6 +3328,13 @@ public class DDLTask extends Task<DDLWor
       tbl.setDataLocation(new Path(crtTbl.getLocation()).toUri());
     }
 
+    if (crtTbl.getSkewedColNames() != null) {
+      tbl.setSkewedColNames(crtTbl.getSkewedColNames());
+    }
+    if (crtTbl.getSkewedColValues() != null) {
+      tbl.setSkewedColValues(crtTbl.getSkewedColValues());
+    }
+
     tbl.setInputFormatClass(crtTbl.getInputFormat());
     tbl.setOutputFormatClass(crtTbl.getOutputFormat());
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Sat Aug 25 02:44:22 2012
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -138,6 +139,11 @@ public class Table implements Serializab
       sd.getSerdeInfo().getParameters().put(Constants.SERIALIZATION_FORMAT, "1");
       sd.setInputFormat(SequenceFileInputFormat.class.getName());
       sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName());
+      SkewedInfo skewInfo = new SkewedInfo();
+      skewInfo.setSkewedColNames(new ArrayList<String>());
+      skewInfo.setSkewedColValues(new ArrayList<List<String>>());
+      skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
+      sd.setSkewedInfo(skewInfo);
     }
 
     org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table();
@@ -410,6 +416,43 @@ public class Table implements Serializab
     return tTable.getTableName();
   }
 
+   /* (non-Javadoc)
+    * @see java.lang.Object#hashCode()
+    */
+   @Override
+   public int hashCode() {
+     final int prime = 31;
+     int result = 1;
+     result = prime * result + ((tTable == null) ? 0 : tTable.hashCode());
+     return result;
+   }
+
+   /* (non-Javadoc)
+    * @see java.lang.Object#equals(java.lang.Object)
+    */
+   @Override
+   public boolean equals(Object obj) {
+     if (this == obj) {
+       return true;
+     }
+     if (obj == null) {
+       return false;
+     }
+     if (getClass() != obj.getClass()) {
+       return false;
+     }
+     Table other = (Table) obj;
+     if (tTable == null) {
+       if (other.tTable != null) {
+         return false;
+       }
+     } else if (!tTable.equals(other.tTable)) {
+       return false;
+     }
+     return true;
+   }
+
+
   public List<FieldSchema> getPartCols() {
     List<FieldSchema> partKeys = tTable.getPartitionKeys();
     if (partKeys == null) {
@@ -471,6 +514,40 @@ public class Table implements Serializab
     tTable.getSd().setSortCols(sortOrder);
   }
 
+  public void setSkewedValueLocationMap(List<String> valList, String dirName)
+      throws HiveException {
+    Map<List<String>, String> mappings = tTable.getSd().getSkewedInfo()
+        .getSkewedColValueLocationMaps();
+    if (null == mappings) {
+      mappings = new HashMap<List<String>, String>();
+      tTable.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings);
+    }
+
+    // Add or update new mapping
+    mappings.put(valList, dirName);
+  }
+
+  public Map<List<String>,String> getSkewedColValueLocationMaps() {
+    return tTable.getSd().getSkewedInfo().getSkewedColValueLocationMaps();
+  }
+
+  public void setSkewedColValues(List<List<String>> skewedValues) throws HiveException {
+    tTable.getSd().getSkewedInfo().setSkewedColValues(skewedValues);
+  }
+
+  public List<List<String>> getSkewedColValues(){
+    return tTable.getSd().getSkewedInfo().getSkewedColValues();
+  }
+
+  public void setSkewedColNames(List<String> skewedColNames) throws HiveException {
+    tTable.getSd().getSkewedInfo().setSkewedColNames(skewedColNames);
+  }
+
+  public List<String> getSkewedColName() {
+    return tTable.getSd().getSkewedInfo().getSkewedColNames();
+  }
+
+
   private boolean isField(String col) {
     for (FieldSchema field : getCols()) {
       if (field.getName().equals(col)) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java Sat Aug 25 02:44:22 2012
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.plan.DescTableDesc;
 import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc;
 
+
 /**
  * This class provides methods to format table and index information.
  *
@@ -191,6 +192,25 @@ public final class MetaDataFormatUtils {
     formatOutput("Bucket Columns:", storageDesc.getBucketCols().toString(), tableInfo);
     formatOutput("Sort Columns:", storageDesc.getSortCols().toString(), tableInfo);
 
+    if (null != storageDesc.getSkewedInfo()) {
+      List<String> skewedColNames = storageDesc.getSkewedInfo().getSkewedColNames();
+      if ((skewedColNames != null) && (skewedColNames.size() > 0)) {
+        formatOutput("Skewed Columns:", skewedColNames.toString(), tableInfo);
+      }
+
+      List<List<String>> skewedColValues = storageDesc.getSkewedInfo().getSkewedColValues();
+      if ((skewedColValues != null) && (skewedColValues.size() > 0)) {
+        formatOutput("Skewed Values:", skewedColValues.toString(), tableInfo);
+      }
+
+      Map<List<String>, String> skewedColMap = storageDesc.getSkewedInfo()
+          .getSkewedColValueLocationMaps();
+      if ((skewedColMap!=null) && (skewedColMap.size() > 0)) {
+        formatOutput("Skewed Value to Location Mapping:", skewedColMap.toString(),
+            tableInfo);
+      }
+    }
+
     if (storageDesc.getSerdeInfo().getParametersSize() > 0) {
       tableInfo.append("Storage Desc Params:").append(LINE_DELIM);
       displayAllParameters(storageDesc.getSerdeInfo().getParameters(), tableInfo);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sat Aug 25 02:44:22 2012
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
@@ -1856,9 +1857,27 @@ public class DDLSemanticAnalyzer extends
       }
     }
 
+    String oldColName = ast.getChild(1).getText();
+    String newColName = ast.getChild(2).getText();
+
+    /*Validate the operation of renaming a column name.*/
+    Table tab = null;
+    try {
+      tab = db.getTable(tblName);
+    } catch (HiveException e) {
+      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName), e);
+    }
+    SkewedInfo skewInfo = tab.getTTable().getSd().getSkewedInfo();
+    if ((null != skewInfo)
+        && (null != skewInfo.getSkewedColNames())
+        && skewInfo.getSkewedColNames().contains(oldColName)) {
+      throw new SemanticException(oldColName
+        + ErrorMsg.ALTER_TABLE_NOT_ALLOWED_RENAME_SKEWED_COLUMN.getMsg());
+    }
+
     AlterTableDesc alterTblDesc = new AlterTableDesc(tblName,
-        unescapeIdentifier(ast.getChild(1).getText()), unescapeIdentifier(ast
-        .getChild(2).getText()), newType, newComment, first, flagCol);
+        unescapeIdentifier(oldColName), unescapeIdentifier(newColName),
+        newType, newComment, first, flagCol);
     addInputsOutputsAlterTable(tblName, null, alterTblDesc);
 
     rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Sat Aug 25 02:44:22 2012
@@ -259,6 +259,10 @@ TOK_TABNAME;
 TOK_TABSRC;
 TOK_RESTRICT;
 TOK_CASCADE;
+TOK_TABLESKEWED;
+TOK_TABCOLVALUE;
+TOK_TABCOLVALUE_PAIR;
+TOK_TABCOLVALUES;
 }
 
 
@@ -451,6 +455,7 @@ createTableStatement
          tableComment?
          tablePartition?
          tableBuckets?
+         tableSkewed?
          tableRowFormat?
          tableFileFormat?
          tableLocation?
@@ -463,6 +468,7 @@ createTableStatement
          tableComment?
          tablePartition?
          tableBuckets?
+         tableSkewed?
          tableRowFormat?
          tableFileFormat?
          tableLocation?
@@ -1080,6 +1086,14 @@ tableBuckets
     -> ^(TOK_TABLEBUCKETS $bucketCols $sortCols? $num)
     ;
 
+tableSkewed
+@init { msgs.push("table skewed specification"); }
+@after { msgs.pop(); }
+    :
+     KW_SKEWED KW_BY LPAREN skewedCols=columnNameList RPAREN KW_ON LPAREN (skewedValues=skewedValueElement) RPAREN
+    -> ^(TOK_TABLESKEWED $skewedCols $skewedValues)
+    ;
+
 rowFormat
 @init { msgs.push("serde specification"); }
 @after { msgs.pop(); }
@@ -1241,6 +1255,41 @@ columnNameOrderList
     : columnNameOrder (COMMA columnNameOrder)* -> ^(TOK_TABCOLNAME columnNameOrder+)
     ;
 
+skewedValueElement
+@init { msgs.push("skewed value element"); }
+@after { msgs.pop(); }
+    : 
+      skewedColumnValues
+     | skewedColumnValuePairList
+    ;
+
+skewedColumnValuePairList
+@init { msgs.push("column value pair list"); }
+@after { msgs.pop(); }
+    : skewedColumnValuePair (COMMA skewedColumnValuePair)* -> ^(TOK_TABCOLVALUE_PAIR skewedColumnValuePair+)
+    ;
+
+skewedColumnValuePair
+@init { msgs.push("column value pair"); }
+@after { msgs.pop(); }
+    : 
+      LPAREN colValues=skewedColumnValues RPAREN 
+      -> ^(TOK_TABCOLVALUES $colValues)
+    ;
+
+skewedColumnValues
+@init { msgs.push("column values"); }
+@after { msgs.pop(); }
+    : skewedColumnValue (COMMA skewedColumnValue)* -> ^(TOK_TABCOLVALUE skewedColumnValue+)
+    ;
+
+skewedColumnValue
+@init { msgs.push("column value"); }
+@after { msgs.pop(); }
+    :
+      constant
+    ;
+
 columnNameOrder
 @init { msgs.push("column name order"); }
 @after { msgs.pop(); }
@@ -2367,6 +2416,7 @@ KW_SHOW_DATABASE: 'SHOW_DATABASE';
 KW_UPDATE: 'UPDATE';
 KW_RESTRICT: 'RESTRICT';
 KW_CASCADE: 'CASCADE';
+KW_SKEWED: 'SKEWED';
 
 
 // Operators

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java Sat Aug 25 02:44:22 2012
@@ -112,7 +112,11 @@ public class ImportSemanticAnalyzer exte
             table.getSd().getSerdeInfo().getSerializationLib(),
             null, // storagehandler passed as table params
             table.getSd().getSerdeInfo().getParameters(),
-            table.getParameters(), false);
+            table.getParameters(), false,
+            (null == table.getSd().getSkewedInfo()) ? null : table.getSd().getSkewedInfo()
+                .getSkewedColNames(),
+            (null == table.getSd().getSkewedInfo()) ? null : table.getSd().getSkewedInfo()
+                .getSkewedColValues());
 
 
         List<FieldSchema> partCols = tblDesc.getPartCols();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Sat Aug 25 02:44:22 2012
@@ -43,10 +43,10 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
 import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
 
 /**
  * Parse Context: The current parse context. This is passed to the optimizer
@@ -64,6 +64,7 @@ public class ParseContext {
   private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
   private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
   private HashMap<TableScanOperator, sampleDesc> opToSamplePruner;
+  private Map<TableScanOperator, ExprNodeDesc> opToSkewedPruner;
   private HashMap<String, Operator<? extends Serializable>> topOps;
   private HashMap<String, Operator<? extends Serializable>> topSelOps;
   private LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx;
@@ -166,7 +167,8 @@ public class ParseContext {
       HashMap<TableScanOperator, sampleDesc> opToSamplePruner,
       GlobalLimitCtx globalLimitCtx,
       HashMap<String, SplitSample> nameToSplitSample,
-      HashSet<ReadEntity> semanticInputs, List<Task<? extends Serializable>> rootTasks) {
+      HashSet<ReadEntity> semanticInputs, List<Task<? extends Serializable>> rootTasks,
+      Map<TableScanOperator, ExprNodeDesc> opToSkewedPruner) {
     this.conf = conf;
     this.qb = qb;
     this.ast = ast;
@@ -192,6 +194,7 @@ public class ParseContext {
     this.globalLimitCtx = globalLimitCtx;
     this.semanticInputs = semanticInputs;
     this.rootTasks = rootTasks;
+    this.opToSkewedPruner = opToSkewedPruner;
   }
 
   /**
@@ -557,4 +560,19 @@ public class ParseContext {
     }
     return partsList;
   }
+
+  /**
+   * @return the opToSkewedPruner
+   */
+  public Map<TableScanOperator, ExprNodeDesc> getOpToSkewedPruner() {
+    return opToSkewedPruner;
+  }
+
+  /**
+   * @param opToSkewedPruner the opToSkewedPruner to set
+   */
+  public void setOpToSkewedPruner(HashMap<TableScanOperator, ExprNodeDesc> opToSkewedPruner) {
+    this.opToSkewedPruner = opToSkewedPruner;
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Sat Aug 25 02:44:22 2012
@@ -18,6 +18,13 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+
 
 /**
  * Library of utility functions used in the parse code.
@@ -63,4 +70,30 @@ public final class ParseUtils {
   private ParseUtils() {
     // prevent instantiation
   }
+
+  public static List<String> validateColumnNameUniqueness(
+      List<FieldSchema> fieldSchemas) throws SemanticException {
+
+    // no duplicate column names
+    // currently, it is a simple n*n algorithm - this can be optimized later if
+    // need be
+    // but it should not be a major bottleneck as the number of columns are
+    // anyway not so big
+    Iterator<FieldSchema> iterCols = fieldSchemas.iterator();
+    List<String> colNames = new ArrayList<String>();
+    while (iterCols.hasNext()) {
+      String colName = iterCols.next().getName();
+      Iterator<String> iter = colNames.iterator();
+      while (iter.hasNext()) {
+        String oldColName = iter.next();
+        if (colName.equalsIgnoreCase(oldColName)) {
+          throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES
+              .getMsg(oldColName));
+        }
+      }
+      colNames.add(colName);
+    }
+    return colNames;
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Sat Aug 25 02:44:22 2012
@@ -25,6 +25,7 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
 
 /**
@@ -206,4 +207,20 @@ public class QB {
   public boolean isCTAS() {
     return tblDesc != null;
   }
+
+  /**
+   * Retrieve skewed column name for a table.
+   * @param alias table alias
+   * @return
+   */
+  public List<String> getSkewedColumnNames(String alias) {
+    List<String> skewedColNames = null;
+    if (null != qbm &&
+        null != qbm.getAliasToTable() &&
+            qbm.getAliasToTable().size() > 0) {
+      Table tbl = getMetaData().getTableForAlias(alias);
+      skewedColNames = tbl.getSkewedColName();
+    }
+    return skewedColNames;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Aug 25 02:44:22 2012
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.parse;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -77,7 +78,6 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
-import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -161,14 +161,11 @@ import org.apache.hadoop.hive.serde.Cons
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -194,6 +191,7 @@ public class SemanticAnalyzer extends Ba
   private UnionProcContext uCtx;
   List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer;
   private HashMap<TableScanOperator, sampleDesc> opToSamplePruner;
+  private final Map<TableScanOperator, ExprNodeDesc> opToSkewedPruner;
   /**
    * a map for the split sampling, from ailias to an instance of SplitSample
    * that describes percentage and number.
@@ -245,6 +243,7 @@ public class SemanticAnalyzer extends Ba
     autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
                          HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
     queryProperties = new QueryProperties();
+    opToSkewedPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
   }
 
   @Override
@@ -292,7 +291,7 @@ public class SemanticAnalyzer extends Ba
         topSelOps, opParseCtx, joinContext, topToTable, loadTableWork,
         loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
         listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
-        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks);
+        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToSkewedPruner);
   }
 
   @SuppressWarnings("nls")
@@ -1590,7 +1589,7 @@ public class SemanticAnalyzer extends Ba
         }
 
         ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
-            name, colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+            name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
         col_list.add(expr);
         output.put(tmp[0], tmp[1],
             new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
@@ -2279,9 +2278,11 @@ public class SemanticAnalyzer extends Ba
           throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(colAlias));
         }
 
-        out_rwsch.put(tabAlias, colAlias, new ColumnInfo(
-            getColumnInternalName(pos), exp.getWritableObjectInspector(),
-            tabAlias, false));
+        ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos),
+          exp.getWritableObjectInspector(), tabAlias, false);
+        colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
+          .isSkewedCol() : false);
+        out_rwsch.put(tabAlias, colAlias, colInfo);
 
         pos = Integer.valueOf(pos.intValue() + 1);
       }
@@ -4375,7 +4376,7 @@ public class SemanticAnalyzer extends Ba
             .getTypeInfoFromObjectInspector(tableFieldOI);
         TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
         ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo,
-            rowFields.get(i).getInternalName(), "", false);
+            rowFields.get(i).getInternalName(), "", false, rowFields.get(i).isSkewedCol());
         // LazySimpleSerDe can convert any types to String type using
         // JSON-format.
         if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)
@@ -6567,10 +6568,15 @@ public class SemanticAnalyzer extends Ba
         List<? extends StructField> fields = rowObjectInspector
             .getAllStructFieldRefs();
         for (int i = 0; i < fields.size(); i++) {
-          rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields
-              .get(i).getFieldName(), TypeInfoUtils
-              .getTypeInfoFromObjectInspector(fields.get(i)
-              .getFieldObjectInspector()), alias, false));
+          /**
+           * if the column is a skewed column, use ColumnInfo accordingly
+           */
+          ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(),
+            TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i)
+              .getFieldObjectInspector()), alias, false);
+          colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i)
+            .getFieldName())) ? true : false);
+          rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
         }
       } catch (SerDeException e) {
         throw new RuntimeException(e);
@@ -6751,6 +6757,17 @@ public class SemanticAnalyzer extends Ba
     return output;
   }
 
+  private boolean isSkewedCol(String alias, QB qb, String colName) {
+    boolean isSkewedCol = false;
+    List<String> skewedCols = qb.getSkewedColumnNames(alias);
+    for (String skewedCol:skewedCols) {
+      if (skewedCol.equalsIgnoreCase(colName)) {
+        isSkewedCol = true;
+      }
+    }
+    return isSkewedCol;
+  }
+
   private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch)
       throws SemanticException {
 
@@ -7186,7 +7203,7 @@ public class SemanticAnalyzer extends Ba
       // generate a DDL task and make it a dependent task of the leaf
       CreateTableDesc crtTblDesc = qb.getTableDesc();
 
-      validateCreateTable(crtTblDesc);
+      crtTblDesc.validate();
 
       // Clear the output for CTAS since we don't need the output from the
       // mapredWork, the
@@ -7456,7 +7473,7 @@ public class SemanticAnalyzer extends Ba
         opToPartList, topOps, topSelOps, opParseCtx, joinContext, topToTable,
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
         listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
-        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks);
+        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToSkewedPruner);
 
     Optimizer optm = new Optimizer();
     optm.setPctx(pCtx);
@@ -7483,7 +7500,7 @@ public class SemanticAnalyzer extends Ba
     // modify it below as part of imposing view column names.
     List<FieldSchema> derivedSchema =
         new ArrayList<FieldSchema>(resultSchema);
-    validateColumnNameUniqueness(derivedSchema);
+    ParseUtils.validateColumnNameUniqueness(derivedSchema);
 
     List<FieldSchema> imposedSchema = createVwDesc.getSchema();
     if (imposedSchema != null) {
@@ -7653,7 +7670,7 @@ public class SemanticAnalyzer extends Ba
       }
       return new ExprNodeColumnDesc(colInfo.getType(), colInfo
           .getInternalName(), colInfo.getTabAlias(), colInfo
-          .getIsVirtualCol());
+          .getIsVirtualCol(), colInfo.isSkewedCol());
     }
 
     // Create the walker and  the rules dispatcher.
@@ -7883,6 +7900,9 @@ public class SemanticAnalyzer extends Ba
     final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT)
     final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS)
     int command_type = CREATE_TABLE;
+    List<String> skewedColNames = new ArrayList<String>();
+    List<List<String>> skewedValues = new ArrayList<List<String>>();
+    Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>();
 
     RowFormatParams rowFormatParams = new RowFormatParams();
     StorageFormat storageFormat = new StorageFormat();
@@ -7990,7 +8010,60 @@ public class SemanticAnalyzer extends Ba
       case HiveParser.TOK_FILEFORMAT_GENERIC:
         handleGenericFileFormat(child);
         break;
-
+      case HiveParser.TOK_TABLESKEWED:
+        /**
+         * Throw an error if the user tries to use the DDL with
+         * hive.internal.ddl.list.bucketing.enable set to false.
+         */
+        HiveConf hiveConf = SessionState.get().getConf();
+        if (!(hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INTERNAL_DDL_LIST_BUCKETING_ENABLE))) {
+          throw new SemanticException(ErrorMsg.HIVE_INTERNAL_DDL_LIST_BUCKETING_DISABLED.getMsg());
+        }
+
+        // skewed column names
+        skewedColNames = analyzeCreateTableSkewedColNames(skewedColNames, child);
+        // skewed value
+        Tree vNode = child.getChild(1);
+        if (vNode == null) {
+          throw new SemanticException(ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
+        } else {
+          ASTNode vAstNode = (ASTNode) vNode;
+          switch (vAstNode.getToken().getType()) {
+            case HiveParser.TOK_TABCOLVALUE:
+              for (String str : getSkewedColumnValuesFromASTNode(vAstNode)) {
+                List<String> sList = new ArrayList<String>(Arrays.asList(str));
+                skewedValues.add(sList);
+              }
+              break;
+            case HiveParser.TOK_TABCOLVALUE_PAIR:
+              ArrayList<Node> vLNodes = vAstNode.getChildren();
+              for (Node node : vLNodes) {
+                if ( ((ASTNode) node).getToken().getType() != HiveParser.TOK_TABCOLVALUES) {
+                  throw new SemanticException(
+                      ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
+                } else {
+                  Tree leafVNode = ((ASTNode) node).getChild(0);
+                  if (leafVNode == null) {
+                    throw new SemanticException(
+                        ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
+                  } else {
+                    ASTNode lVAstNode = (ASTNode) leafVNode;
+                    if (lVAstNode.getToken().getType() != HiveParser.TOK_TABCOLVALUE) {
+                      throw new SemanticException(
+                          ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
+                    } else {
+                      skewedValues.add(new ArrayList<String>(
+                          getSkewedColumnValuesFromASTNode(lVAstNode)));
+                    }
+                  }
+                }
+              }
+              break;
+            default:
+              break;
+          }
+        }
+        break;
       default:
         assert false;
       }
@@ -8025,9 +8098,10 @@ public class SemanticAnalyzer extends Ba
           bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape,
           rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment,
           storageFormat.inputFormat, storageFormat.outputFormat, location, shared.serde,
-          storageFormat.storageHandler, shared.serdeProps, tblProps, ifNotExists);
+          storageFormat.storageHandler, shared.serdeProps, tblProps, ifNotExists, skewedColNames,
+          skewedValues);
 
-      validateCreateTable(crtTblDesc);
+      crtTblDesc.validate();
       // outputs is empty, which means this create table happens in the current
       // database.
       SessionState.get().setCommandType(HiveOperation.CREATETABLE);
@@ -8067,7 +8141,7 @@ public class SemanticAnalyzer extends Ba
           bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape,
           rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.inputFormat,
           storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, shared.serdeProps,
-          tblProps, ifNotExists);
+          tblProps, ifNotExists, skewedColNames, skewedValues);
       qb.setTableDesc(crtTblDesc);
 
       SessionState.get().setCommandType(HiveOperation.CREATETABLE_AS_SELECT);
@@ -8079,6 +8153,50 @@ public class SemanticAnalyzer extends Ba
     return null;
   }
 
+  /**
+   * Analyze list bucket column names
+   *
+   * @param skewedColNames
+   * @param child
+   * @return
+   * @throws SemanticException
+   */
+  private List<String> analyzeCreateTableSkewedColNames(List<String> skewedColNames,
+      ASTNode child) throws SemanticException {
+    Tree nNode = child.getChild(0);
+    if (nNode == null) {
+      throw new SemanticException(ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_NAME.getMsg());
+    } else {
+      ASTNode nAstNode = (ASTNode) nNode;
+      if (nAstNode.getToken().getType() != HiveParser.TOK_TABCOLNAME) {
+        throw new SemanticException(ErrorMsg.CREATE_SKEWED_TABLE_NO_COLUMN_NAME.getMsg());
+      } else {
+        skewedColNames = getColumnNames(nAstNode);
+      }
+    }
+    return skewedColNames;
+  }
+
+  /**
+   * Given a ASTNode, return list of values.
+   *
+   * use case:
+   *   create table xyz list bucketed (col1) with skew (1,2,5)
+   *   AST Node is for (1,2,5)
+   * @param ast
+   * @return
+   */
+  protected List<String> getSkewedColumnValuesFromASTNode(ASTNode ast) {
+    List<String> colList = new ArrayList<String>();
+    int numCh = ast.getChildCount();
+    for (int i = 0; i < numCh; i++) {
+      ASTNode child = (ASTNode) ast.getChild(i);
+      colList.add(unescapeIdentifier(child.getText()).toLowerCase());
+    }
+    return colList;
+  }
+
+
   private ASTNode analyzeCreateView(ASTNode ast, QB qb)
       throws SemanticException {
     String tableName = getUnescapedName((ASTNode)ast.getChild(0));
@@ -8135,124 +8253,6 @@ public class SemanticAnalyzer extends Ba
     return selectStmt;
   }
 
-  private List<String> validateColumnNameUniqueness(
-      List<FieldSchema> fieldSchemas) throws SemanticException {
-
-    // no duplicate column names
-    // currently, it is a simple n*n algorithm - this can be optimized later if
-    // need be
-    // but it should not be a major bottleneck as the number of columns are
-    // anyway not so big
-    Iterator<FieldSchema> iterCols = fieldSchemas.iterator();
-    List<String> colNames = new ArrayList<String>();
-    while (iterCols.hasNext()) {
-      String colName = iterCols.next().getName();
-      Iterator<String> iter = colNames.iterator();
-      while (iter.hasNext()) {
-        String oldColName = iter.next();
-        if (colName.equalsIgnoreCase(oldColName)) {
-          throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES
-              .getMsg(oldColName));
-        }
-      }
-      colNames.add(colName);
-    }
-    return colNames;
-  }
-
-  private void validateCreateTable(CreateTableDesc crtTblDesc)
-      throws SemanticException {
-
-    if ((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) {
-      // for now make sure that serde exists
-      if (StringUtils.isEmpty(crtTblDesc.getSerName())
-          || !SerDeUtils.shouldGetColsFromSerDe(crtTblDesc.getSerName())) {
-        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
-      }
-      return;
-    }
-
-    if (crtTblDesc.getStorageHandler() == null) {
-      try {
-        Class<?> origin = Class.forName(crtTblDesc.getOutputFormat(), true,
-          JavaUtils.getClassLoader());
-        Class<? extends HiveOutputFormat> replaced = HiveFileFormatUtils
-          .getOutputFormatSubstitute(origin);
-        if (replaced == null) {
-          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
-            .getMsg());
-        }
-      } catch (ClassNotFoundException e) {
-        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
-      }
-    }
-
-    List<String> colNames = validateColumnNameUniqueness(crtTblDesc.getCols());
-
-    if (crtTblDesc.getBucketCols() != null) {
-      // all columns in cluster and sort are valid columns
-      Iterator<String> bucketCols = crtTblDesc.getBucketCols().iterator();
-      while (bucketCols.hasNext()) {
-        String bucketCol = bucketCols.next();
-        boolean found = false;
-        Iterator<String> colNamesIter = colNames.iterator();
-        while (colNamesIter.hasNext()) {
-          String colName = colNamesIter.next();
-          if (bucketCol.equalsIgnoreCase(colName)) {
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
-        }
-      }
-    }
-
-    if (crtTblDesc.getSortCols() != null) {
-      // all columns in cluster and sort are valid columns
-      Iterator<Order> sortCols = crtTblDesc.getSortCols().iterator();
-      while (sortCols.hasNext()) {
-        String sortCol = sortCols.next().getCol();
-        boolean found = false;
-        Iterator<String> colNamesIter = colNames.iterator();
-        while (colNamesIter.hasNext()) {
-          String colName = colNamesIter.next();
-          if (sortCol.equalsIgnoreCase(colName)) {
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
-        }
-      }
-    }
-
-    if (crtTblDesc.getPartCols() != null) {
-      // there is no overlap between columns and partitioning columns
-      Iterator<FieldSchema> partColsIter = crtTblDesc.getPartCols().iterator();
-      while (partColsIter.hasNext()) {
-        FieldSchema fs = partColsIter.next();
-        String partCol = fs.getName();
-        PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(
-            fs.getType());
-        if(null == pte){
-          throw new SemanticException(ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found "
-        + partCol + " of type: " + fs.getType());
-        }
-        Iterator<String> colNamesIter = colNames.iterator();
-        while (colNamesIter.hasNext()) {
-          String colName = unescapeIdentifier(colNamesIter.next());
-          if (partCol.equalsIgnoreCase(colName)) {
-            throw new SemanticException(
-                ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
-          }
-        }
-      }
-    }
-  }
-
   private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
       GlobalLimitCtx globalLimitCtx)
     throws SemanticException {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java Sat Aug 25 02:44:22 2012
@@ -20,12 +20,25 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 
 /**
  * CreateTableDesc.
@@ -56,6 +69,8 @@ public class CreateTableDesc extends DDL
   Map<String, String> serdeProps;
   Map<String, String> tblProps;
   boolean ifNotExists;
+  List<String> skewedColNames;
+  List<List<String>> skewedColValues;
 
   public CreateTableDesc() {
   }
@@ -69,13 +84,13 @@ public class CreateTableDesc extends DDL
       String storageHandler,
       Map<String, String> serdeProps,
       Map<String, String> tblProps,
-      boolean ifNotExists) {
+      boolean ifNotExists, List<String> skewedColNames, List<List<String>> skewedColValues) {
 
     this(tableName, isExternal, cols, partCols,
         bucketCols, sortCols, numBuckets, fieldDelim, fieldEscape,
         collItemDelim, mapKeyDelim, lineDelim, comment, inputFormat,
         outputFormat, location, serName, storageHandler, serdeProps,
-        tblProps, ifNotExists);
+        tblProps, ifNotExists, skewedColNames, skewedColValues);
 
     this.databaseName = databaseName;
   }
@@ -89,7 +104,7 @@ public class CreateTableDesc extends DDL
       String storageHandler,
       Map<String, String> serdeProps,
       Map<String, String> tblProps,
-      boolean ifNotExists) {
+      boolean ifNotExists, List<String> skewedColNames, List<List<String>> skewedColValues) {
     this.tableName = tableName;
     this.isExternal = isExternal;
     this.bucketCols = new ArrayList<String>(bucketCols);
@@ -111,6 +126,8 @@ public class CreateTableDesc extends DDL
     this.serdeProps = serdeProps;
     this.tblProps = tblProps;
     this.ifNotExists = ifNotExists;
+    this.skewedColNames = new ArrayList<String>(skewedColNames);
+    this.skewedColValues = new ArrayList<List<String>>(skewedColValues);
   }
 
   @Explain(displayName = "columns")
@@ -342,4 +359,219 @@ public class CreateTableDesc extends DDL
     this.tblProps = tblProps;
   }
 
+  /**
+   * @return the skewedColNames
+   */
+  public List<String> getSkewedColNames() {
+    return skewedColNames;
+  }
+
+  /**
+   * @param skewedColNames the skewedColNames to set
+   */
+  public void setSkewedColNames(ArrayList<String> skewedColNames) {
+    this.skewedColNames = skewedColNames;
+  }
+
+  /**
+   * @return the skewedColValues
+   */
+  public List<List<String>> getSkewedColValues() {
+    return skewedColValues;
+  }
+
+  /**
+   * @param skewedColValues the skewedColValues to set
+   */
+  public void setSkewedColValues(ArrayList<List<String>> skewedColValues) {
+    this.skewedColValues = skewedColValues;
+  }
+
+  public void validate()
+      throws SemanticException {
+
+    if ((this.getCols() == null) || (this.getCols().size() == 0)) {
+      // for now make sure that serde exists
+      if (StringUtils.isEmpty(this.getSerName())
+          || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) {
+        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
+      }
+      return;
+    }
+
+    if (this.getStorageHandler() == null) {
+      try {
+        Class<?> origin = Class.forName(this.getOutputFormat(), true,
+          JavaUtils.getClassLoader());
+        Class<? extends HiveOutputFormat> replaced = HiveFileFormatUtils
+          .getOutputFormatSubstitute(origin);
+        if (replaced == null) {
+          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
+            .getMsg());
+        }
+      } catch (ClassNotFoundException e) {
+        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
+      }
+    }
+
+    List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols());
+
+    if (this.getBucketCols() != null) {
+      // all columns in cluster and sort are valid columns
+      Iterator<String> bucketCols = this.getBucketCols().iterator();
+      while (bucketCols.hasNext()) {
+        String bucketCol = bucketCols.next();
+        boolean found = false;
+        Iterator<String> colNamesIter = colNames.iterator();
+        while (colNamesIter.hasNext()) {
+          String colName = colNamesIter.next();
+          if (bucketCol.equalsIgnoreCase(colName)) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
+        }
+      }
+    }
+
+    if (this.getSortCols() != null) {
+      // all columns in cluster and sort are valid columns
+      Iterator<Order> sortCols = this.getSortCols().iterator();
+      while (sortCols.hasNext()) {
+        String sortCol = sortCols.next().getCol();
+        boolean found = false;
+        Iterator<String> colNamesIter = colNames.iterator();
+        while (colNamesIter.hasNext()) {
+          String colName = colNamesIter.next();
+          if (sortCol.equalsIgnoreCase(colName)) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
+        }
+      }
+    }
+
+    if (this.getPartCols() != null) {
+      // there is no overlap between columns and partitioning columns
+      Iterator<FieldSchema> partColsIter = this.getPartCols().iterator();
+      while (partColsIter.hasNext()) {
+        FieldSchema fs = partColsIter.next();
+        String partCol = fs.getName();
+        PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils
+            .getTypeEntryFromTypeName(
+            fs.getType());
+        if(null == pte){
+          throw new SemanticException(ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found "
+        + partCol + " of type: " + fs.getType());
+        }
+        Iterator<String> colNamesIter = colNames.iterator();
+        while (colNamesIter.hasNext()) {
+          String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next());
+          if (partCol.equalsIgnoreCase(colName)) {
+            throw new SemanticException(
+                ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
+          }
+        }
+      }
+    }
+
+    validateSkewedInformation(colNames);
+  }
+
+
+  /**
+   * Validate skewed table creation
+
+   * @param colNames
+   * @throws SemanticException
+   */
+  private void validateSkewedInformation(List<String> colNames)
+      throws SemanticException {
+    if (this.getSkewedColNames().size() > 0) {
+      /**
+       * all columns in skewed column name are valid columns
+       */
+      validateSkewedColNames(colNames);
+
+      /**
+       * find out duplicate skewed column name
+       */
+      validateSkewedColumnNameUniqueness(this.getSkewedColNames());
+
+      if (this.getSkewedColValues() == null || this.getSkewedColValues().size() == 0) {
+        /**
+         * skewed column value is empty but skewed col name is not empty. something is wrong
+         */
+        throw new SemanticException(
+            ErrorMsg.CREATE_SKEWED_TABLE_SKEWED_COL_NAME_VALUE_MISMATCH_2.getMsg());
+
+      } else {
+        /**
+         * each skewed col value should have the same number as number of skewed column names
+         */
+        validateSkewedColNameValueNumberMatch();
+
+      }
+    } else if (this.getSkewedColValues().size() > 0) {
+      /**
+       * skewed column name is empty but skewed col value is not empty. something is wrong
+       */
+      throw new SemanticException(
+          ErrorMsg.CREATE_SKEWED_TABLE_SKEWED_COL_NAME_VALUE_MISMATCH_1.getMsg());
+    }
+  }
+
+  private void validateSkewedColNameValueNumberMatch()
+      throws SemanticException {
+    for (List<String> colValue : this.getSkewedColValues()) {
+      if (colValue.size() != this.getSkewedColNames().size()) {
+        throw new SemanticException(
+            ErrorMsg.CREATE_SKEWED_TABLE_SKEWED_COL_NAME_VALUE_MISMATCH_3.getMsg()
+                + this.getSkewedColNames().size() + " : "
+                + colValue.size());
+      }
+    }
+  }
+
+  private void validateSkewedColNames(List<String> colNames)
+      throws SemanticException {
+    // make a copy
+    List<String> copySkewedColNames = new ArrayList<String>(this.getSkewedColNames());
+    // remove valid columns
+    copySkewedColNames.removeAll(colNames);
+    if (copySkewedColNames.size() > 0) {
+      StringBuilder invalidColNames = new StringBuilder();
+      for (String name : copySkewedColNames) {
+        invalidColNames.append(name);
+        invalidColNames.append(" ");
+      }
+      throw new SemanticException(
+          ErrorMsg.CREATE_SKEWED_TABLE_INVALID_COLUMN.getMsg(invalidColNames.toString()));
+    }
+  }
+
+
+  /**
+   * Find out duplicate name
+   * @param names
+   * @throws SemanticException
+   */
+  private void validateSkewedColumnNameUniqueness(
+      List<String> names) throws SemanticException {
+
+    Set<String> lookup = new HashSet<String>();
+    for (String name : names) {
+      if (lookup.contains(name)) {
+        throw new SemanticException(ErrorMsg.CREATE_SKEWED_TABLE_DUPLICATE_COLUMN_NAMES
+            .getMsg(name));
+      } else {
+        lookup.add(name);
+      }
+    }
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java?rev=1377187&r1=1377186&r2=1377187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java Sat Aug 25 02:44:22 2012
@@ -47,6 +47,11 @@ public class ExprNodeColumnDesc extends 
    */
   private boolean isPartitionColOrVirtualCol;
 
+  /**
+   * Is the column a skewed column
+   */
+  private boolean isSkewedCol;
+
   public ExprNodeColumnDesc() {
   }
 
@@ -66,6 +71,15 @@ public class ExprNodeColumnDesc extends 
     this.isPartitionColOrVirtualCol = isPartitionColOrVirtualCol;
   }
 
+  public ExprNodeColumnDesc(TypeInfo typeInfo, String column, String tabAlias,
+      boolean isPartitionColOrVirtualCol, boolean isSkewedCol) {
+    super(typeInfo);
+    this.column = column;
+    this.tabAlias = tabAlias;
+    this.isPartitionColOrVirtualCol = isPartitionColOrVirtualCol;
+    this.isSkewedCol = isSkewedCol;
+  }
+
   public String getColumn() {
     return column;
   }
@@ -110,7 +124,8 @@ public class ExprNodeColumnDesc extends 
 
   @Override
   public ExprNodeDesc clone() {
-    return new ExprNodeColumnDesc(typeInfo, column, tabAlias, isPartitionColOrVirtualCol);
+    return new ExprNodeColumnDesc(typeInfo, column, tabAlias, isPartitionColOrVirtualCol,
+        isSkewedCol);
   }
 
   @Override
@@ -127,4 +142,18 @@ public class ExprNodeColumnDesc extends 
     }
     return true;
   }
+
+  /**
+   * @return the isSkewedCol
+   */
+  public boolean isSkewedCol() {
+    return isSkewedCol;
+  }
+
+  /**
+   * @param isSkewedCol the isSkewedCol to set
+   */
+  public void setSkewedCol(boolean isSkewedCol) {
+    this.isSkewedCol = isSkewedCol;
+  }
 }

Added: hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q Sat Aug 25 02:44:22 2012
@@ -0,0 +1,6 @@
+set hive.internal.ddl.list.bucketing.enable=true;
+
+CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6);
+
+ALTER TABLE skewedtable CHANGE key key_new STRING;
+

Added: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q Sat Aug 25 02:44:22 2012
@@ -0,0 +1,3 @@
+set hive.internal.ddl.list.bucketing.enable=true;
+
+CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key) ON ((1),(5,8),(6));

Added: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q Sat Aug 25 02:44:22 2012
@@ -0,0 +1,3 @@
+set hive.internal.ddl.list.bucketing.enable=true;
+
+CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key,key) ON ((1),(5),(6));

Added: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q Sat Aug 25 02:44:22 2012
@@ -0,0 +1,4 @@
+set hive.internal.ddl.list.bucketing.enable=true;
+
+CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key_non) ON ((1),(5),(6));
+ 
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table.q?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table.q Sat Aug 25 02:44:22 2012
@@ -0,0 +1,10 @@
+set hive.internal.ddl.list.bucketing.enable=true;
+CREATE TABLE list_bucket_single (key STRING, value STRING) SKEWED BY (key) ON (1,5,6);
+CREATE TABLE list_bucket_single_2 (key STRING, value STRING) SKEWED BY (key) ON ((1),(5),(6));
+CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78));
+describe formatted list_bucket_single_2;
+describe formatted list_bucket_single;
+describe formatted list_bucket_multiple;
+drop table list_bucket_single;
+drop table list_bucket_multiple;
+drop table list_bucket_single_2;

Added: hive/trunk/ql/src/test/results/clientnegative/column_rename5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/column_rename5.q.out?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/column_rename5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/column_rename5.q.out Sat Aug 25 02:44:22 2012
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@skewedtable
+FAILED: SemanticException key is a skewed column. It's not allowed to rename skewed column.

Added: hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_col_name_value_no_mismatch.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_col_name_value_no_mismatch.q.out?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_col_name_value_no_mismatch.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_col_name_value_no_mismatch.q.out Sat Aug 25 02:44:22 2012
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10206]: The number of skewed column names and the number of skewed column values are different: 1 : 2

Added: hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_dup_col_name.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_dup_col_name.q.out?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_dup_col_name.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_dup_col_name.q.out Sat Aug 25 02:44:22 2012
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10202]: Duplicate skewed column name: key

Added: hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_failure_invalid_col_name.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_failure_invalid_col_name.q.out?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_failure_invalid_col_name.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/create_skewed_table_failure_invalid_col_name.q.out Sat Aug 25 02:44:22 2012
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10203]: Invalid skewed column name: key_non 

Added: hive/trunk/ql/src/test/results/clientpositive/create_skewed_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/create_skewed_table.q.out?rev=1377187&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/create_skewed_table.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/create_skewed_table.q.out Sat Aug 25 02:44:22 2012
@@ -0,0 +1,133 @@
+PREHOOK: query: CREATE TABLE list_bucket_single (key STRING, value STRING) SKEWED BY (key) ON (1,5,6)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE list_bucket_single (key STRING, value STRING) SKEWED BY (key) ON (1,5,6)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucket_single
+PREHOOK: query: CREATE TABLE list_bucket_single_2 (key STRING, value STRING) SKEWED BY (key) ON ((1),(5),(6))
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE list_bucket_single_2 (key STRING, value STRING) SKEWED BY (key) ON ((1),(5),(6))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucket_single_2
+PREHOOK: query: CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78))
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucket_multiple
+PREHOOK: query: describe formatted list_bucket_single_2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted list_bucket_single_2
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Skewed Columns:     	[key]               	 
+Skewed Values:      	[[1], [5], [6]]     	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: describe formatted list_bucket_single
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted list_bucket_single
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Skewed Columns:     	[key]               	 
+Skewed Values:      	[[1], [5], [6]]     	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: describe formatted list_bucket_multiple
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted list_bucket_multiple
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+col1                	string              	None                
+col2                	int                 	None                
+col3                	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Skewed Columns:     	[col1, col2]        	 
+Skewed Values:      	[['s1', 1], ['s3', 3], ['s13', 13], ['s78', 78]]	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: drop table list_bucket_single
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucket_single
+PREHOOK: Output: default@list_bucket_single
+POSTHOOK: query: drop table list_bucket_single
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucket_single
+POSTHOOK: Output: default@list_bucket_single
+PREHOOK: query: drop table list_bucket_multiple
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucket_multiple
+PREHOOK: Output: default@list_bucket_multiple
+POSTHOOK: query: drop table list_bucket_multiple
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucket_multiple
+POSTHOOK: Output: default@list_bucket_multiple
+PREHOOK: query: drop table list_bucket_single_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucket_single_2
+PREHOOK: Output: default@list_bucket_single_2
+POSTHOOK: query: drop table list_bucket_single_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucket_single_2
+POSTHOOK: Output: default@list_bucket_single_2