You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/05 12:59:26 UTC

svn commit: r1417374 [2/11] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/ha...

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Wed Dec  5 11:59:15 2012
@@ -35,7 +35,6 @@ import org.antlr.runtime.tree.Tree;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.ql.Context;
@@ -56,8 +55,9 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
+import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
@@ -915,6 +915,28 @@ public abstract class BaseSemanticAnalyz
   }
 
   /**
+   * Construct list bucketing context.
+   *
+   * @param skewedColNames
+   * @param skewedValues
+   * @param skewedColValueLocationMaps
+   * @param isStoredAsSubDirectories
+   * @return
+   */
+  protected ListBucketingCtx constructListBucketingCtx(List<String> skewedColNames,
+      List<List<String>> skewedValues, Map<List<String>, String> skewedColValueLocationMaps,
+      boolean isStoredAsSubDirectories, HiveConf conf) {
+    ListBucketingCtx lbCtx = new ListBucketingCtx();
+    lbCtx.setSkewedColNames(skewedColNames);
+    lbCtx.setSkewedColValues(skewedValues);
+    lbCtx.setLbLocationMap(skewedColValueLocationMaps);
+    lbCtx.setStoredAsSubDirectories(isStoredAsSubDirectories);
+    lbCtx.setDefaultKey(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_KEY);
+    lbCtx.setDefaultDirName(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME);
+    return lbCtx;
+  }
+
+  /**
    * Given a ASTNode, return list of values.
    *
    * use case:
@@ -1036,4 +1058,5 @@ public abstract class BaseSemanticAnalyz
     }
     return storedAsDirs;
   }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Dec  5 11:59:15 2012
@@ -18,13 +18,8 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_CASCADE;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_DATABASECOMMENT;
 import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_DATABASELOCATION;
 import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_DATABASEPROPERTIES;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_IFEXISTS;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_IFNOTEXISTS;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_SHOWDATABASES;
 
 import java.io.Serializable;
 import java.net.URI;
@@ -94,6 +89,7 @@ import org.apache.hadoop.hive.ql.plan.Dr
 import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.GrantDesc;
 import org.apache.hadoop.hive.ql.plan.GrantRevokeRoleDDL;
+import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.LockTableDesc;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
@@ -1217,6 +1213,7 @@ public class DDLSemanticAnalyzer extends
     Path oldTblPartLoc = null;
     Path newTblPartLoc = null;
     Table tblObj = null;
+    ListBucketingCtx lbCtx = null;
 
     try {
       tblObj = db.getTable(tableName);
@@ -1258,6 +1255,9 @@ public class DDLSemanticAnalyzer extends
               .getAuthority(), partPath.toUri().getPath());
 
           oldTblPartLoc = partPath;
+
+          lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(),
+              part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
         }
       } else {
         inputFormatClass = tblObj.getInputFormatClass();
@@ -1266,6 +1266,9 @@ public class DDLSemanticAnalyzer extends
         // input and output are the same
         oldTblPartLoc = tblObj.getPath();
         newTblPartLoc = tblObj.getPath();
+
+        lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(),
+            tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
       }
 
       // throw a HiveException for non-rcfile.
@@ -1290,6 +1293,8 @@ public class DDLSemanticAnalyzer extends
 
       mergeDesc.setInputDir(inputDir);
 
+      mergeDesc.setLbCtx(lbCtx);
+
       addInputsOutputsAlterTable(tableName, partSpec);
       DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
       ddlWork.setNeedLock(true);
@@ -1299,6 +1304,7 @@ public class DDLSemanticAnalyzer extends
       mergeDesc.setOutputDir(queryTmpdir);
       LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, queryTmpdir, tblDesc,
           partSpec == null ? new HashMap<String, String>() : partSpec);
+      ltd.setLbCtx(lbCtx);
       Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
           conf);
       mergeTask.addDependentTask(moveTsk);
@@ -2711,9 +2717,6 @@ public class DDLSemanticAnalyzer extends
      * hive.internal.ddl.list.bucketing.enable set to false.
      */
     HiveConf hiveConf = SessionState.get().getConf();
-    if (!(hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INTERNAL_DDL_LIST_BUCKETING_ENABLE))) {
-      throw new SemanticException(ErrorMsg.HIVE_INTERNAL_DDL_LIST_BUCKETING_DISABLED.getMsg());
-    }
 
     String tableName = getUnescapedName((ASTNode) ast.getChild(0));
     Table tab = null;
@@ -2863,9 +2866,6 @@ public class DDLSemanticAnalyzer extends
      * hive.internal.ddl.list.bucketing.enable set to false.
      */
     HiveConf hiveConf = SessionState.get().getConf();
-    if (!(hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INTERNAL_DDL_LIST_BUCKETING_ENABLE))) {
-      throw new SemanticException(ErrorMsg.HIVE_INTERNAL_DDL_LIST_BUCKETING_DISABLED.getMsg());
-    }
     /**
      * Retrieve mappings from parser
      */

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Dec  5 11:59:15 2012
@@ -141,6 +141,7 @@ import org.apache.hadoop.hive.ql.plan.Jo
 import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
 import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
 import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
@@ -4492,6 +4493,7 @@ public class SemanticAnalyzer extends Ba
     DynamicPartitionCtx dpCtx = null;
     LoadTableDesc ltd = null;
     boolean holdDDLTime = checkHoldDDLTime(qb);
+    ListBucketingCtx lbCtx = null;
 
     switch (dest_type.intValue()) {
     case QBMetaData.DEST_TABLE: {
@@ -4579,6 +4581,10 @@ public class SemanticAnalyzer extends Ba
       currentTableId = destTableId;
       destTableId++;
 
+      lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(),
+          dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(),
+          dest_tab.isStoredAsSubDirectories(), conf);
+
       // Create the work for moving the table
       // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
       if (!isNonNativeTable) {
@@ -4586,6 +4592,7 @@ public class SemanticAnalyzer extends Ba
             table_desc, dpCtx);
         ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
             dest_tab.getTableName()));
+        ltd.setLbCtx(lbCtx);
 
         if (holdDDLTime) {
           LOG.info("this query will not update transient_lastDdlTime!");
@@ -4655,10 +4662,14 @@ public class SemanticAnalyzer extends Ba
       currentTableId = destTableId;
       destTableId++;
 
+      lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(),
+          dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(),
+          dest_part.isStoredAsSubDirectories(), conf);
       ltd = new LoadTableDesc(queryTmpdir, ctx.getExternalTmpFileURI(dest_path.toUri()),
           table_desc, dest_part.getSpec());
       ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
           dest_tab.getTableName()));
+      ltd.setLbCtx(lbCtx);
 
       if (holdDDLTime) {
         try {
@@ -4832,6 +4843,13 @@ public class SemanticAnalyzer extends Ba
       rsCtx.getPartnCols(),
       dpCtx);
 
+    /* Set List Bucketing context. */
+    if (lbCtx != null) {
+      lbCtx.processRowSkewedIndex(fsRS);
+      lbCtx.calculateSkewedValueSubDirList();
+    }
+    fileSinkDesc.setLbCtx(lbCtx);
+
     // set the stats publishing/aggregating key prefix
     // the same as directory name. The directory name
     // can be changed in the optimizer  but the key should not be changed
@@ -4865,7 +4883,6 @@ public class SemanticAnalyzer extends Ba
     return output;
   }
 
-
   /**
    * Generate the conversion SelectOperator that converts the columns into the
    * types that are expected by the table_desc.
@@ -8705,9 +8722,6 @@ public class SemanticAnalyzer extends Ba
          * hive.internal.ddl.list.bucketing.enable set to false.
          */
         HiveConf hiveConf = SessionState.get().getConf();
-        if (!(hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INTERNAL_DDL_LIST_BUCKETING_ENABLE))) {
-          throw new SemanticException(ErrorMsg.HIVE_INTERNAL_DDL_LIST_BUCKETING_DISABLED.getMsg());
-        }
 
         // skewed column names
         skewedColNames = analyzeSkewedTablDDLColNames(skewedColNames, child);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Wed Dec  5 11:59:15 2012
@@ -53,6 +53,7 @@ public class ConditionalResolverMergeFil
     List<Task<? extends Serializable>> listTasks;
     private String dir;
     private DynamicPartitionCtx dpCtx; // merge task could be after dynamic partition insert
+    private ListBucketingCtx lbCtx;
 
     public ConditionalResolverMergeFilesCtx() {
     }
@@ -103,6 +104,20 @@ public class ConditionalResolverMergeFil
     public void setDPCtx(DynamicPartitionCtx dp) {
       dpCtx = dp;
     }
+
+    /**
+     * @return the lbCtx
+     */
+    public ListBucketingCtx getLbCtx() {
+      return lbCtx;
+    }
+
+    /**
+     * @param lbCtx the lbCtx to set
+     */
+    public void setLbCtx(ListBucketingCtx lbCtx) {
+      this.lbCtx = lbCtx;
+    }
   }
 
   public List<Task<? extends Serializable>> getTasks(HiveConf conf,
@@ -131,104 +146,39 @@ public class ConditionalResolverMergeFil
         // For each dynamic partition, check if it needs to be merged.
         MapredWork work = (MapredWork) mrTask.getWork();
 
+        int lbLevel = (ctx.getLbCtx() == null) ? 0 : ctx.getLbCtx().calculateListBucketingLevel();
+
+        /**
+         * In order to make code easier to read, we write the following in the way:
+         * 1. the first if clause to differ dynamic partition and static partition
+         * 2. with static partition, we differ list bucketing from non-list bucketing.
+         * Another way to write it is to merge static partition w/ LB wit DP. In that way,
+         * we still need to further differ them, since one uses lbLevel and
+         * another lbLevel+numDPCols.
+         * The first one is selected mainly for easy to read.
+         */
         // Dynamic partition: replace input path (root to dp paths) with dynamic partition
         // input paths.
         if (dpCtx != null &&  dpCtx.getNumDPCols() > 0) {
+          int numDPCols = dpCtx.getNumDPCols();
+          int dpLbLevel = numDPCols + lbLevel;
 
-          // get list of dynamic partitions
-          FileStatus[] status = Utilities.getFileStatusRecurse(dirPath,
-              dpCtx.getNumDPCols(), inpFs);
-
-          // cleanup pathToPartitionInfo
-          Map<String, PartitionDesc> ptpi = work.getPathToPartitionInfo();
-          assert ptpi.size() == 1;
-          String path = ptpi.keySet().iterator().next();
-          TableDesc tblDesc = ptpi.get(path).getTableDesc();
-          ptpi.remove(path); // the root path is not useful anymore
-
-          // cleanup pathToAliases
-          Map<String, ArrayList<String>> pta = work.getPathToAliases();
-          assert pta.size() == 1;
-          path = pta.keySet().iterator().next();
-          ArrayList<String> aliases = pta.get(path);
-          pta.remove(path); // the root path is not useful anymore
-
-          // populate pathToPartitionInfo and pathToAliases w/ DP paths
-          long totalSz = 0;
-          boolean doMerge = false;
-          // list of paths that don't need to merge but need to move to the dest location
-          List<String> toMove = new ArrayList<String>();
-          for (int i = 0; i < status.length; ++i) {
-            long len = getMergeSize(inpFs, status[i].getPath(), avgConditionSize);
-            if (len >= 0) {
-              doMerge = true;
-              totalSz += len;
-              Map<String, String> fullPartSpec = new LinkedHashMap<String, String>(
-                  dpCtx.getPartSpec());
-              Warehouse.makeSpecFromName(fullPartSpec, status[i].getPath());
-              PartitionDesc pDesc = new PartitionDesc(tblDesc, (LinkedHashMap) fullPartSpec);
-
-              work.resolveDynamicPartitionMerge(conf, status[i].getPath(), tblDesc,
-                  aliases, pDesc);
-            } else {
-              toMove.add(status[i].getPath().toString());
-            }
-          }
-          if (doMerge) {
-            // add the merge MR job
-            setupMapRedWork(conf, work, trgtSize, totalSz);
-
-            // add the move task for those partitions that do not need merging
-            if (toMove.size() > 0) {
-          	  // modify the existing move task as it is already in the candidate running tasks
-
-          	  // running the MoveTask and MR task in parallel may
-              // cause the mvTask write to /ds=1 and MR task write
-              // to /ds=1_1 for the same partition.
-              // make the MoveTask as the child of the MR Task
-          	  resTsks.add(mrAndMvTask);
-
-          	  MoveWork mvWork = (MoveWork) mvTask.getWork();
-          	  LoadFileDesc lfd = mvWork.getLoadFileWork();
-
-          	  String targetDir = lfd.getTargetDir();
-          	  List<String> targetDirs = new ArrayList<String>(toMove.size());
-          	  int numDPCols = dpCtx.getNumDPCols();
-
-              for (int i = 0; i < toMove.size(); i++) {
-                String toMoveStr = toMove.get(i);
-                if (toMoveStr.endsWith(Path.SEPARATOR)) {
-                  toMoveStr = toMoveStr.substring(0, toMoveStr.length() - 1);
-                }
-                String [] moveStrSplits = toMoveStr.split(Path.SEPARATOR);
-                int dpIndex = moveStrSplits.length - numDPCols;
-                String target = targetDir;
-                while (dpIndex < moveStrSplits.length) {
-                  target = target + Path.SEPARATOR + moveStrSplits[dpIndex];
-                  dpIndex ++;
-                }
-
-                targetDirs.add(target);
-              }
-
-          	  LoadMultiFilesDesc lmfd = new LoadMultiFilesDesc(toMove,
-          	      targetDirs, lfd.getIsDfsDir(), lfd.getColumns(), lfd.getColumnTypes());
-          	  mvWork.setLoadFileWork(null);
-          	  mvWork.setLoadTableWork(null);
-          	  mvWork.setMultiFilesDesc(lmfd);
-          	} else {
-          	  resTsks.add(mrTask);
-          	}
-          } else { // add the move task
-            resTsks.add(mvTask);
-          }
+          generateActualTasks(conf, resTsks, trgtSize, avgConditionSize, mvTask, mrTask,
+              mrAndMvTask, dirPath, inpFs, ctx, work, dpLbLevel);
         } else { // no dynamic partitions
-          long totalSz = getMergeSize(inpFs, dirPath, avgConditionSize);
-          if (totalSz >= 0) { // add the merge job
-            setupMapRedWork(conf, work, trgtSize, totalSz);
-            resTsks.add(mrTask);
-          } else { // don't need to merge, add the move job
-            resTsks.add(mvTask);
+          if(lbLevel == 0) {
+            // static partition without list bucketing
+            long totalSz = getMergeSize(inpFs, dirPath, avgConditionSize);
+            if (totalSz >= 0) { // add the merge job
+              setupMapRedWork(conf, work, trgtSize, totalSz);
+              resTsks.add(mrTask);
+            } else { // don't need to merge, add the move job
+              resTsks.add(mvTask);
+            }
+          } else {
+            // static partition and list bucketing
+            generateActualTasks(conf, resTsks, trgtSize, avgConditionSize, mvTask, mrTask,
+                mrAndMvTask, dirPath, inpFs, ctx, work, lbLevel);
           }
         }
       } else {
@@ -244,6 +194,131 @@ public class ConditionalResolverMergeFil
     return resTsks;
   }
 
+  /**
+   * This method generates actual task for conditional tasks. It could be
+   * 1. move task only
+   * 2. merge task only
+   * 3. merge task followed by a move task.
+   * It used to be true for dynamic partition only since static partition doesn't have #3.
+   * It changes w/ list bucketing. Static partition has #3 since it has sub-directories.
+   * For example, if a static partition is defined as skewed and stored-as-directores,
+   * instead of all files in one directory, it will create a sub-dir per skewed value plus
+   * default directory. So #3 is required for static partition.
+   * So, we move it to a method so that it can be used by both SP and DP.
+   * @param conf
+   * @param resTsks
+   * @param trgtSize
+   * @param avgConditionSize
+   * @param mvTask
+   * @param mrTask
+   * @param mrAndMvTask
+   * @param dirPath
+   * @param inpFs
+   * @param ctx
+   * @param work
+   * @param dpLbLevel
+   * @throws IOException
+   */
+  private void generateActualTasks(HiveConf conf, List<Task<? extends Serializable>> resTsks,
+      long trgtSize, long avgConditionSize, Task<? extends Serializable> mvTask,
+      Task<? extends Serializable> mrTask, Task<? extends Serializable> mrAndMvTask, Path dirPath,
+      FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapredWork work, int dpLbLevel)
+      throws IOException {
+    DynamicPartitionCtx dpCtx = ctx.getDPCtx();
+    // get list of dynamic partitions
+    FileStatus[] status = Utilities.getFileStatusRecurse(dirPath, dpLbLevel, inpFs);
+
+    // cleanup pathToPartitionInfo
+    Map<String, PartitionDesc> ptpi = work.getPathToPartitionInfo();
+    assert ptpi.size() == 1;
+    String path = ptpi.keySet().iterator().next();
+    PartitionDesc partDesc = ptpi.get(path);
+    TableDesc tblDesc = partDesc.getTableDesc();
+    ptpi.remove(path); // the root path is not useful anymore
+
+    // cleanup pathToAliases
+    Map<String, ArrayList<String>> pta = work.getPathToAliases();
+    assert pta.size() == 1;
+    path = pta.keySet().iterator().next();
+    ArrayList<String> aliases = pta.get(path);
+    pta.remove(path); // the root path is not useful anymore
+
+    // populate pathToPartitionInfo and pathToAliases w/ DP paths
+    long totalSz = 0;
+    boolean doMerge = false;
+    // list of paths that don't need to merge but need to move to the dest location
+    List<String> toMove = new ArrayList<String>();
+    for (int i = 0; i < status.length; ++i) {
+      long len = getMergeSize(inpFs, status[i].getPath(), avgConditionSize);
+      if (len >= 0) {
+        doMerge = true;
+        totalSz += len;
+        PartitionDesc pDesc = (dpCtx != null) ? generateDPFullPartSpec(dpCtx, status, tblDesc, i)
+            : partDesc;
+        work.resolveDynamicPartitionStoredAsSubDirsMerge(conf, status[i].getPath(), tblDesc,
+            aliases, pDesc);
+      } else {
+        toMove.add(status[i].getPath().toString());
+      }
+    }
+    if (doMerge) {
+      // add the merge MR job
+      setupMapRedWork(conf, work, trgtSize, totalSz);
+
+      // add the move task for those partitions that do not need merging
+      if (toMove.size() > 0) {
+        // modify the existing move task as it is already in the candidate running tasks
+
+        // running the MoveTask and MR task in parallel may
+        // cause the mvTask write to /ds=1 and MR task write
+        // to /ds=1_1 for the same partition.
+        // make the MoveTask as the child of the MR Task
+        resTsks.add(mrAndMvTask);
+
+        MoveWork mvWork = (MoveWork) mvTask.getWork();
+        LoadFileDesc lfd = mvWork.getLoadFileWork();
+
+        String targetDir = lfd.getTargetDir();
+        List<String> targetDirs = new ArrayList<String>(toMove.size());
+
+        for (int i = 0; i < toMove.size(); i++) {
+          String toMoveStr = toMove.get(i);
+          if (toMoveStr.endsWith(Path.SEPARATOR)) {
+            toMoveStr = toMoveStr.substring(0, toMoveStr.length() - 1);
+          }
+          String[] moveStrSplits = toMoveStr.split(Path.SEPARATOR);
+          int dpIndex = moveStrSplits.length - dpLbLevel;
+          String target = targetDir;
+          while (dpIndex < moveStrSplits.length) {
+            target = target + Path.SEPARATOR + moveStrSplits[dpIndex];
+            dpIndex++;
+          }
+
+          targetDirs.add(target);
+        }
+
+        LoadMultiFilesDesc lmfd = new LoadMultiFilesDesc(toMove,
+            targetDirs, lfd.getIsDfsDir(), lfd.getColumns(), lfd.getColumnTypes());
+        mvWork.setLoadFileWork(null);
+        mvWork.setLoadTableWork(null);
+        mvWork.setMultiFilesDesc(lmfd);
+      } else {
+        resTsks.add(mrTask);
+      }
+    } else { // add the move task
+      resTsks.add(mvTask);
+    }
+  }
+
+  private PartitionDesc generateDPFullPartSpec(DynamicPartitionCtx dpCtx, FileStatus[] status,
+      TableDesc tblDesc, int i) {
+    Map<String, String> fullPartSpec = new LinkedHashMap<String, String>(
+        dpCtx.getPartSpec());
+    Warehouse.makeSpecFromName(fullPartSpec, status[i].getPath());
+    PartitionDesc pDesc = new PartitionDesc(tblDesc, (LinkedHashMap) fullPartSpec);
+    return pDesc;
+  }
+
   private void setupMapRedWork(HiveConf conf, MapredWork work, long targetSize, long totalSize) {
     if (work.getNumReduceTasks() > 0) {
       int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java Wed Dec  5 11:59:15 2012
@@ -60,6 +60,7 @@ public class FileSinkDesc extends Abstra
   transient private List<FileSinkDesc> linkedFileSinkDesc;
 
   private boolean statsReliable;
+  private ListBucketingCtx lbCtx;
   private int maxStatsKeyPrefixLength = -1;
 
   public FileSinkDesc() {
@@ -309,6 +310,20 @@ public class FileSinkDesc extends Abstra
     this.statsReliable = statsReliable;
   }
 
+  /**
+   * @return the lbCtx
+   */
+  public ListBucketingCtx getLbCtx() {
+    return lbCtx;
+  }
+
+  /**
+   * @param lbCtx the lbCtx to set
+   */
+  public void setLbCtx(ListBucketingCtx lbCtx) {
+    this.lbCtx = lbCtx;
+  }
+
   public List<FileSinkDesc> getLinkedFileSinkDesc() {
     return linkedFileSinkDesc;
   }
@@ -324,4 +339,5 @@ public class FileSinkDesc extends Abstra
   public void setMaxStatsKeyPrefixLength(int maxStatsKeyPrefixLength) {
     this.maxStatsKeyPrefixLength = maxStatsKeyPrefixLength;
   }
+
 }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java Wed Dec  5 11:59:15 2012
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
+/**
+ * Context for list bucketing.
+ * It's created in SemanticAnalyzer.genFileSinkPlan().
+ * It's used in FileSinkOperator.processOp(), merging files, alter table ...concatenate etc.
+ */
+public class ListBucketingCtx implements Serializable {
+  /**
+   * default serialization ID.
+   */
+  private static final long serialVersionUID = 1L;
+  private List<String> skewedColNames;
+  private List<List<String>> skewedColValues;
+  private Map<List<String>, String> lbLocationMap;
+  private List<Integer> rowSkewedIndex;
+  private boolean isStoredAsSubDirectories;
+  private String defaultKey;
+  private String defaultDirName;
+  private List<String> skewedValuesDirNames;
+
+  public ListBucketingCtx() {
+    rowSkewedIndex = new ArrayList<Integer>();
+    skewedValuesDirNames = new ArrayList<String>();
+  }
+
+  /**
+   * @return the skewedColNames
+   */
+  public List<String> getSkewedColNames() {
+    return skewedColNames;
+  }
+
+  /**
+   * @param skewedColNames the skewedColNames to set
+   */
+  public void setSkewedColNames(List<String> skewedColNames) {
+    this.skewedColNames = skewedColNames;
+  }
+
+  /**
+   * @return the skewedColValues
+   */
+  public List<List<String>> getSkewedColValues() {
+    return skewedColValues;
+  }
+
+  /**
+   * @param skewedColValues the skewedColValues to set
+   */
+  public void setSkewedColValues(List<List<String>> skewedColValues) {
+    this.skewedColValues = skewedColValues;
+  }
+
+  /**
+   * @return the lbLocationMap
+   */
+  public Map<List<String>, String> getLbLocationMap() {
+    return lbLocationMap;
+  }
+
+  /**
+   * @param lbLocationMap the lbLocationMap to set
+   */
+  public void setLbLocationMap(Map<List<String>, String> lbLocationMap) {
+    this.lbLocationMap = lbLocationMap;
+  }
+
+  /**
+   * Match column in skewed column list and record position.
+   * The position will be used in {@link FileSinkOperator} generateListBucketingDirName().
+   * Note that skewed column name matches skewed value in order.
+   *
+   * @param rowSch
+   */
+  public void processRowSkewedIndex(RowSchema rowSch) {
+    if ((this.skewedColNames != null) && (this.skewedColNames.size() > 0) && (rowSch != null)
+        && (rowSch.getSignature() != null) && (rowSch.getSignature().size() > 0)) {
+      List<ColumnInfo> cols = rowSch.getSignature();
+      int hitNo = 0;
+      for (int i = 0; i < cols.size(); i++) {
+        int index = this.skewedColNames.indexOf(cols.get(i).getInternalName());
+        if (index > -1) {
+          hitNo++;
+          rowSkewedIndex.add(index);
+        }
+      }
+      assert (hitNo == this.skewedColNames.size()) : "RowSchema doesn't have all skewed columns."
+          + "Skewed column: " + this.skewedColNames.toString() + ". Rowschema has columns: " + cols;
+    }
+  }
+
+  /**
+   * Calculate skewed value subdirectory directory which is used in
+   * FileSinkOperator.java createKeyForStatsPublisher()
+   * For example, create table test skewed by (key, value) on (('484','val_484')
+   * stored as DIRECTORIES;
+   * after the method, skewedValuesDirNames will contain 2 elements:
+   * key=484/value=val_484
+   * HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME/HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME
+   */
+  public void calculateSkewedValueSubDirList() {
+    if (isSkewedStoredAsDir()) {
+      for (List<String> value : this.skewedColValues) {
+        skewedValuesDirNames.add(FileUtils.makeListBucketingDirName(this.skewedColNames, value));
+      }
+      // creat default dir
+      skewedValuesDirNames.add(FileUtils.makeDefaultListBucketingDirName(
+          this.skewedColNames,
+          ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME));
+    }
+  }
+
+  /**
+   * @return the rowSkewedIndex
+   */
+  public List<Integer> getRowSkewedIndex() {
+    return rowSkewedIndex;
+  }
+
+  /**
+   * @param rowSkewedIndex the rowSkewedIndex to set
+   */
+  public void setRowSkewedIndex(List<Integer> rowSkewedIndex) {
+    this.rowSkewedIndex = rowSkewedIndex;
+  }
+
+  /**
+   * @return the isStoredAsSubDirectories
+   */
+  public boolean isStoredAsSubDirectories() {
+    return isStoredAsSubDirectories;
+  }
+
+  /**
+   * @param isStoredAsSubDirectories the isStoredAsSubDirectories to set
+   */
+  public void setStoredAsSubDirectories(boolean isStoredAsSubDirectories) {
+    this.isStoredAsSubDirectories = isStoredAsSubDirectories;
+  }
+
+  /**
+   * @return the defaultKey
+   */
+  public String getDefaultKey() {
+    return defaultKey;
+  }
+
+  /**
+   * @param defaultKey the defaultKey to set
+   */
+  public void setDefaultKey(String defaultKey) {
+    this.defaultKey = defaultKey;
+  }
+
+  /**
+   * @return the defaultDirName
+   */
+  public String getDefaultDirName() {
+    return defaultDirName;
+  }
+
+  /**
+   * @param defaultDirName the defaultDirName to set
+   */
+  public void setDefaultDirName(String defaultDirName) {
+    this.defaultDirName = defaultDirName;
+  }
+
+  /**
+   * check if list bucketing is enabled.
+   *
+   * @param ctx
+   * @return
+   */
+  public  boolean isSkewedStoredAsDir() {
+    return (this.getSkewedColNames() != null)
+        && (this.getSkewedColNames().size() > 0)
+        && (this.getSkewedColValues() != null)
+        && (this.getSkewedColValues().size() > 0)
+        && (this.isStoredAsSubDirectories());
+  }
+
+  /**
+   * Calculate list bucketing level.
+   *
+   * 0: not list bucketing
+   * int: no. of skewed columns
+   *
+   * @param ctx
+   * @return
+   */
+  public  int calculateListBucketingLevel() {
+    int lbLevel = isSkewedStoredAsDir() ? this.getSkewedColNames().size() : 0;
+    return lbLevel;
+  }
+
+  /**
+   * @return the skewedValuesDirNames
+   */
+  public List<String> getSkewedValuesDirNames() {
+    return skewedValuesDirNames;
+  }
+
+  /**
+   * @param skewedValuesDirNames the skewedValuesDirNames to set
+   */
+  public void setSkewedValuesDirNames(List<String> skewedValuesDirNames) {
+    this.skewedValuesDirNames = skewedValuesDirNames;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java Wed Dec  5 11:59:15 2012
@@ -32,8 +32,9 @@ public class LoadTableDesc extends org.a
   private boolean replace;
   private String tmpDir;
   private DynamicPartitionCtx dpCtx;
+  private ListBucketingCtx lbCtx;
   private boolean holdDDLTime;
-  private boolean inheritTableSpecs = true; //For partitions, flag controlling whether the current 
+  private boolean inheritTableSpecs = true; //For partitions, flag controlling whether the current
                                             //table specs are to be used
 
   // TODO: the below seems like they should just be combined into partitionDesc
@@ -138,4 +139,18 @@ public class LoadTableDesc extends org.a
   public void setInheritTableSpecs(boolean inheritTableSpecs) {
     this.inheritTableSpecs = inheritTableSpecs;
   }
+
+  /**
+   * @return the lbCtx
+   */
+  public ListBucketingCtx getLbCtx() {
+    return lbCtx;
+  }
+
+  /**
+   * @param lbCtx the lbCtx to set
+   */
+  public void setLbCtx(ListBucketingCtx lbCtx) {
+    this.lbCtx = lbCtx;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Wed Dec  5 11:59:15 2012
@@ -34,7 +34,6 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.parse.QBJoinTree;
 import org.apache.hadoop.hive.ql.parse.SplitSample;
-import org.apache.hadoop.hive.ql.session.SessionState;
 
 /**
  * MapredWork.
@@ -161,31 +160,13 @@ public class MapredWork extends Abstract
     while (itr.hasNext()) {
       final Entry<String, ArrayList<String>> entry = itr.next();
       String origiKey = entry.getKey();
-      String newKey = removePrefixFromWarehouseConfig(origiKey);
+      String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey);
       ArrayList<String> value = entry.getValue();
       trunPathToAliases.put(newKey, value);
     }
     return trunPathToAliases;
   }
 
-  /**
-   * Remove prefix from "Path -> Alias"
-   *
-   * @param origiKey
-   * @return
-   */
-  private String removePrefixFromWarehouseConfig(String origiKey) {
-    String prefix = SessionState.get().getConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
-    if ((prefix != null) && (prefix.length() > 0)) {
-      //Local file system is using pfile:/// {@link ProxyLocalFileSystem}
-      prefix = prefix.replace("pfile:///", "pfile:/");
-      int index = origiKey.indexOf(prefix);
-      if (index > -1) {
-        origiKey = origiKey.substring(index + prefix.length());
-      }
-    }
-    return origiKey;
-  }
 
 
   @Explain(displayName = "Path -> Partition", normalExplain = false)
@@ -499,7 +480,7 @@ public class MapredWork extends Abstract
     this.inputFormatSorted = inputFormatSorted;
   }
 
-  public void resolveDynamicPartitionMerge(HiveConf conf, Path path,
+  public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path,
       TableDesc tblDesc, ArrayList<String> aliases, PartitionDesc partDesc) {
     pathToAliases.put(path.toString(), aliases);
     pathToPartitionInfo.put(path.toString(), partDesc);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Wed Dec  5 11:59:15 2012
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -731,6 +732,28 @@ public final class PlanUtils {
     return val;
   }
 
+  /**
+   * Remove prefix from "Path -> Alias"
+   * This is required for testing.
+   * In order to verify that path is right, we need to display it in expected test result.
+   * But, mask pattern masks path with some patterns.
+   * So, we need to remove prefix from path which triggers mask pattern.
+   * @param origiKey
+   * @return
+   */
+  public static String removePrefixFromWarehouseConfig(String origiKey) {
+    String prefix = SessionState.get().getConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
+    if ((prefix != null) && (prefix.length() > 0)) {
+      //Local file system is using pfile:/// {@link ProxyLocalFileSystem}
+      prefix = prefix.replace("pfile:///", "pfile:/");
+      int index = origiKey.indexOf(prefix);
+      if (index > -1) {
+        origiKey = origiKey.substring(index + prefix.length());
+      }
+    }
+    return origiKey;
+  }
+
   private PlanUtils() {
     // prevent instantiation
   }

Modified: hive/trunk/ql/src/test/queries/clientnegative/column_change_skewedcol_type1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/column_change_skewedcol_type1.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/column_change_skewedcol_type1.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/column_change_skewedcol_type1.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6);
 

Modified: hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/column_rename5.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6);
 

Modified: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_col_name_value_no_mismatch.q Wed Dec  5 11:59:15 2012
@@ -1,4 +1,3 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key) ON ((1),(5,8),(6));

Modified: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_dup_col_name.q Wed Dec  5 11:59:15 2012
@@ -1,4 +1,3 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key,key) ON ((1),(5),(6));

Modified: hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/create_skewed_table_failure_invalid_col_name.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 CREATE TABLE skewed_table (key STRING, value STRING) SKEWED BY (key_non) ON ((1),(5),(6));
  
\ No newline at end of file

Modified: hive/trunk/ql/src/test/queries/clientnegative/invalid_config1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/invalid_config1.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/invalid_config1.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/invalid_config1.q Wed Dec  5 11:59:15 2012
@@ -1,4 +1,3 @@
-
-set hive.internal.ddl.list.bucketing.enable=true;
+set mapred.input.dir.recursive=true;
 
 CREATE TABLE skewedtable (key STRING, value STRING) SKEWED BY (key) ON (1,5,6);

Modified: hive/trunk/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/load_stored_as_dirs.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/load_stored_as_dirs.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/load_stored_as_dirs.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 -- Load data can't work with table with stored as directories
 CREATE TABLE  if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) 

Modified: hive/trunk/ql/src/test/queries/clientpositive/alter_skewed_table.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/alter_skewed_table.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/alter_skewed_table.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/alter_skewed_table.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 create table original (key STRING, value STRING); 
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 
 -- Test stored as directories
 -- it covers a few cases

Modified: hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table1.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/create_skewed_table1.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 CREATE TABLE list_bucket_single (key STRING, value STRING) SKEWED BY (key) ON ('1','5','6');
 CREATE TABLE list_bucket_single_2 (key STRING, value STRING) SKEWED BY (key) ON ((1),(5),(6));
 CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78));

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_1.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_1.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,40 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false; 
+set mapred.input.dir.recursive=true;
+
+-- list bucketing DML : dynamic partition and 2 stage query plan.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key) on ("484")
+stored as DIRECTORIES
+;
+
+-- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08';
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08';
+
+-- check DML result
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11');
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12');
+
+select count(1) from srcpart where ds='2008-04-08';
+select count(1) from list_bucketing_dynamic_part where ds='2008-04-08';
+
+select key, value from srcpart where ds='2008-04-08' and hr='11' and key = "484";
+set hive.optimize.listbucketing=true;
+explain extended
+select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and hr='11' and key = "484";
+select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and hr='11' and key = "484";
+
+-- clean up resources
+drop table list_bucketing_dynamic_part;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_2.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_2.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,71 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+set hive.stats.reliable=true;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 000000_0
+--  5263 000001_0
+-- ds=2008-04-08/hr=11/key=103/value=val_103:
+-- 99 000000_0
+-- 99 000001_0
+-- ds=2008-04-08/hr=11/key=484/value=val_484:
+-- 87 000000_0
+-- 87 000001_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103'))
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_static_part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- 51 and val_51 in the table so skewed data for 51 and val_14 should be none
+-- but query should succeed for 51 or 51 and val_14
+select * from srcpart where ds = '2008-04-08' and key = '51';
+select * from list_bucketing_static_part where key = '51';
+select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14';
+select * from list_bucketing_static_part where key = '51' and value = 'val_14';
+
+-- queries with < <= > >= should work for skewed test although we don't benefit from pruning
+select count(1) from srcpart where ds = '2008-04-08' and key < '51';
+select count(1) from list_bucketing_static_part where key < '51';
+select count(1) from srcpart where ds = '2008-04-08' and key <= '51';
+select count(1) from list_bucketing_static_part where key <= '51';
+select count(1) from srcpart where ds = '2008-04-08' and key > '51';
+select count(1) from list_bucketing_static_part where key > '51';
+select count(1) from srcpart where ds = '2008-04-08' and key >= '51';
+select count(1) from list_bucketing_static_part where key >= '51';
+
+-- clean up
+drop table list_bucketing_static_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_3.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_3.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,33 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- list bucketing DML : static partition and 2 stage query plan.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) partitioned by (ds String, hr String) skewed by (key) on ("484") stored as DIRECTORIES;
+
+-- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08';
+insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08';
+
+-- check DML result
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');
+
+select count(1) from srcpart where ds='2008-04-08';
+select count(1) from list_bucketing_static_part where ds='2008-04-08';
+
+select key, value from srcpart where ds='2008-04-08' and hr='11' and key = "484";
+set hive.optimize.listbucketing=true;
+explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and key = "484";
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and key = "484";
+-- clean up resources
+drop table list_bucketing_static_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_4.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_4.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,71 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 000000_0
+--  5263 000001_0
+-- ds=2008-04-08/hr=11/key=103/value=val_103:
+-- 99 000000_0
+-- 99 000001_0
+-- after merge
+-- 142 000000_0
+-- ds=2008-04-08/hr=11/key=484/value=val_484:
+-- 87 000000_0
+-- 87 000001_0
+-- after merge
+-- 118 000001_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103'))
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+set hive.merge.mapfiles=true;	
+set hive.merge.mapredfiles=true; 
+-- list bucketing DML with merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_static_part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- clean up
+drop table list_bucketing_static_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_5.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_5.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,38 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false; 
+set mapred.input.dir.recursive=true;
+
+-- list bucketing DML: multiple skewed columns. 2 stages
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) 
+stored as DIRECTORIES;
+
+-- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08';
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08';
+
+-- check DML result
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11');
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12');
+
+select count(1) from srcpart where ds='2008-04-08';
+select count(1) from list_bucketing_dynamic_part where ds='2008-04-08';
+
+select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103";
+set hive.optimize.listbucketing=true;
+explain extended
+select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103";
+select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103";
+
+-- clean up resources
+drop table list_bucketing_dynamic_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_6.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_6.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,97 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- list bucketing DML: dynamic partition. multiple skewed columns. merge.
+-- The following explains merge example used in this test case
+-- DML will generated 2 partitions
+-- ds=2008-04-08/hr=a1
+-- ds=2008-04-08/hr=b1
+-- without merge, each partition has more files
+-- ds=2008-04-08/hr=a1 has 2 files
+-- ds=2008-04-08/hr=b1 has 6 files
+-- with merge each partition has more files
+-- ds=2008-04-08/hr=a1 has 1 files
+-- ds=2008-04-08/hr=b1 has 4 files
+-- The following shows file size and name in each directory
+-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 155 000000_0
+-- 155 000001_0
+-- with merge
+-- 254 000000_0
+-- hr=b1/key=103/value=val_103:
+-- without merge
+-- 99 000000_0
+-- 99 000001_0
+-- with merge
+-- 142 000001_0
+-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 5181 000000_0
+-- 5181 000001_0
+-- with merge
+-- 5181 000000_0
+-- 5181 000001_0
+-- hr=b1/key=484/value=val_484
+-- without merge
+-- 87 000000_0
+-- 87 000001_0
+-- with merge
+-- 118 000002_0 
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103'))
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_dynamic_part;
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');	
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+set hive.merge.mapfiles=true;	
+set hive.merge.mapredfiles=true; 
+-- list bucketing DML with merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_dynamic_part;
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');	
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_dynamic_part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- clean up
+drop table list_bucketing_dynamic_part;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,70 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+set hive.merge.rcfile.block.level=true;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML : dynamic partition (one level) , merge , one skewed column
+-- DML without merge files mixed with small and big files:
+-- ds=2008-04-08/hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/
+-- 155 000000_0
+-- ds=2008-04-08/hr=b1/key=484
+-- 87 000000_0
+-- 87 000001_0
+-- ds=2008-04-08/hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/
+-- 5201 000000_0
+-- 5201 000001_0
+-- DML with merge will merge small files
+
+-- skewed table
+CREATE TABLE list_bucketing_dynamic_part (key String, value STRING)
+    PARTITIONED BY (ds string, hr string)
+    skewed by (key) on ('484')
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+   
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_dynamic_part;
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');	
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+set hive.merge.mapfiles=true;	
+set hive.merge.mapredfiles=true; 
+-- list bucketing DML with merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_dynamic_part;
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');	
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_dynamic_part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+explain extended
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- clean up
+drop table list_bucketing_dynamic_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,90 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- list bucketing alter table ... concatenate: 
+-- Use list bucketing DML to generate mutilple files in partitions by turning off merge
+-- dynamic partition. multiple skewed columns. merge.
+-- The following explains merge example used in this test case
+-- DML will generated 2 partitions
+-- ds=2008-04-08/hr=a1
+-- ds=2008-04-08/hr=b1
+-- without merge, each partition has more files
+-- ds=2008-04-08/hr=a1 has 2 files
+-- ds=2008-04-08/hr=b1 has 6 files
+-- with merge each partition has more files
+-- ds=2008-04-08/hr=a1 has 1 files
+-- ds=2008-04-08/hr=b1 has 4 files
+-- The following shows file size and name in each directory
+-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 155 000000_0
+-- 155 000001_0
+-- with merge
+-- 254 000000_0
+-- hr=b1/key=103/value=val_103:
+-- without merge
+-- 99 000000_0
+-- 99 000001_0
+-- with merge
+-- 142 000001_0
+-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 5181 000000_0
+-- 5181 000001_0
+-- with merge
+-- 5181 000000_0
+-- 5181 000001_0
+-- hr=b1/key=484/value=val_484
+-- without merge
+-- 87 000000_0
+-- 87 000001_0
+-- with merge
+-- 118 000002_0 
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103'))
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_dynamic_part;
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');	
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+set hive.merge.current.job.concatenate.list.bucketing=true;
+-- concatenate the partition and it will merge files
+alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate;
+
+desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_dynamic_part;
+explain extended
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- clean up
+drop table list_bucketing_dynamic_part;
+
+
+

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_9.q?rev=1417374&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_9.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_9.q Wed Dec  5 11:59:15 2012
@@ -0,0 +1,71 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.smallfiles.avgsize=200;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 000000_0
+--  5263 000001_0
+-- ds=2008-04-08/hr=11/key=103:
+-- 99 000000_0
+-- 99 000001_0
+-- after merge
+-- 142 000000_0
+-- ds=2008-04-08/hr=11/key=484:
+-- 87 000000_0
+-- 87 000001_0
+-- after merge
+-- 118 000001_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (key) on ('484','103')
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+set hive.merge.mapfiles=true;	
+set hive.merge.mapredfiles=true; 
+-- list bucketing DML with merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08';
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+select count(1) from srcpart where ds = '2008-04-08';
+select count(*) from list_bucketing_static_part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+
+-- clean up
+drop table list_bucketing_static_part;

Modified: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_1.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_1.q Wed Dec  5 11:59:15 2012
@@ -1,7 +1,8 @@
-set hive.mapred.supports.subdirectories=true;	
-set hive.internal.ddl.list.bucketing.enable=true;
+set hive.mapred.supports.subdirectories=true;
 set hive.optimize.listbucketing=true;
 set mapred.input.dir.recursive=true;	
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false; 
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)	
@@ -17,66 +18,39 @@ set hive.input.format=org.apache.hadoop.
 -- 1. pruner only pick up right directory
 -- 2. query result is right
 
--- create 1 table: fact_daily
--- 1. create a few partitions
--- 2. dfs move partition according to list bucketing structure (simulate DML) 
---    $/fact_daily/ds=1/hr=4/x=../y=..
---    notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
--- 3. alter it to skewed table and set up location map
--- 4. list bucketing query
--- fact_daily (ds=1 and hr=4) will be used for list bucketing query	
-CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)	
-LOCATION '${hiveconf:hive.metastore.warehouse.dir}/fact_daily';	
-
--- create /fact_daily/ds=1/hr=1 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')	
-SELECT key, value FROM src WHERE key=484;	
-
--- create /fact_daily/ds=1/hr=2 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')	
-SELECT key, value FROM src WHERE key=369 or key=406;
-
--- create /fact_daily/ds=1/hr=3 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')	
-SELECT key, value FROM src WHERE key=238;
-
-dfs -lsr ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=1 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=484/y=val_484;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=2 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=3 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=238/y=val_238;
-dfs -lsr ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1;
-
--- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
-alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'));	
-ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4');	
-
--- set List Bucketing location map
-alter table fact_daily PARTITION (ds = '1', hr='4') set skewed location ((484,'val_484')='${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=484/y=val_484',
-(238,'val_238')='${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=238/y=val_238');
+-- create a skewed table
+create table fact_daily (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on (('484','val_484'),('238','val_238')) 
+stored as DIRECTORIES;
+
+insert overwrite table fact_daily partition (ds = '1', hr = '4')
+select key, value from src;
+
 describe formatted fact_daily PARTITION (ds = '1', hr='4');
 	
-SELECT * FROM fact_daily WHERE ds='1' and hr='4';	
+SELECT count(1) FROM fact_daily WHERE ds='1' and hr='4';	
 
 -- pruner only pick up skewed-value directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484');
+explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484');
 -- List Bucketing Query
-SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484');
+SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484');
 
 -- pruner only pick up skewed-value directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238');
+explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238');
 -- List Bucketing Query
-SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238');
+SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238');
 
 -- pruner only pick up default directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3");
+explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3");
 -- List Bucketing Query
-SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3");
+SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3");
 
 -- pruner only pick up default directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 495;
+explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '495';
 -- List Bucketing Query
-SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 369;
+SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369';

Modified: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_2.q?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_query_multiskew_2.q Wed Dec  5 11:59:15 2012
@@ -1,5 +1,4 @@
 	set hive.mapred.supports.subdirectories=true;
-set hive.internal.ddl.list.bucketing.enable=true;
 set hive.optimize.listbucketing=true;
 set mapred.input.dir.recursive=true;	
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
@@ -16,64 +15,36 @@ set hive.input.format=org.apache.hadoop.
 -- 1. pruner only pick up right directory
 -- 2. query result is right
 
+-- create a skewed table
+create table fact_daily (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on (('484','val_484'),('238','val_238')) 
+stored as DIRECTORIES;
+
+insert overwrite table fact_daily partition (ds = '1', hr = '4')
+select key, value from src;
 
--- create 1 table: fact_daily
--- 1. create a few partitions
--- 2. dfs move partition according to list bucketing structure (simulate DML) 
---    $/fact_daily/ds=1/hr=4/x=../y=..
---    notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
--- 3. alter it to skewed table and set up location map
--- 4. list bucketing query
--- fact_daily (ds=1 and hr=4) will be used for list bucketing query	
-CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)	
-LOCATION '${hiveconf:hive.metastore.warehouse.dir}/fact_daily';	
-
--- create /fact_daily/ds=1/hr=1 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')	
-SELECT key, value FROM src WHERE key=484;	
-
--- create /fact_daily/ds=1/hr=2 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')	
-SELECT key, value FROM src WHERE key=369 or key=406;
-
--- create /fact_daily/ds=1/hr=3 directory	
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')	
-SELECT key, value FROM src WHERE key=238;
-
-dfs -lsr ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=1 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=484/y=val_484;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=2 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME;
-dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=3 ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=238/y=val_238;
-dfs -lsr ${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1;
-
--- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
-alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'));	
-ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4');	
-
--- set List Bucketing location map
-alter table fact_daily PARTITION (ds = '1', hr='4') set skewed location ((484,'val_484')='${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=484/y=val_484',
-(238,'val_238')='${hiveconf:hive.metastore.warehouse.dir}/fact_daily/ds=1/hr=4/x=238/y=val_238');
 describe formatted fact_daily PARTITION (ds = '1', hr='4');
 	
-SELECT * FROM fact_daily WHERE ds='1' and hr='4';	
+SELECT count(1) FROM fact_daily WHERE ds='1' and hr='4';	
 
 -- pruner only pick up default directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484';
+explain extended SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and value= 'val_484';
 -- List Bucketing Query
-SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484';
+SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and value= 'val_484';
 
 -- pruner only pick up default directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x FROM fact_daily WHERE ds='1' and hr='4' and x= 406;
+explain extended SELECT key FROM fact_daily WHERE ds='1' and hr='4' and key= '406';
 -- List Bucketing Query
-SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and x= 406;
+SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and key= '406';
 
 -- pruner only pick up skewed-value directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484')  or (x=238 and y= 'val_238')) ;
+explain extended SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484')  or (key='238' and value= 'val_238')) ;
 -- List Bucketing Query
-SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484')  or (x=238 and y= 'val_238')) ;
+SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484')  or (key='238' and value= 'val_238')) ;
 
 -- clean up
 drop table fact_daily;
\ No newline at end of file