You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/04/05 12:34:11 UTC

svn commit: r1464915 [2/5] - in /hive/branches/HIVE-4115: ./ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ hcatalog/build-support/ant/ hcatalog/historical/branches/ hcatalog/historical/sit...

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Fri Apr  5 10:34:08 2013
@@ -20,10 +20,12 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.hive.ql.plan.CollectDesc;
 import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
 import org.apache.hadoop.hive.ql.plan.ExtractDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.ForwardDesc;
@@ -36,6 +38,7 @@ import org.apache.hadoop.hive.ql.plan.La
 import org.apache.hadoop.hive.ql.plan.LimitDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
 import org.apache.hadoop.hive.ql.plan.ScriptDesc;
@@ -74,6 +77,7 @@ public final class OperatorFactory {
     opvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, FileSinkOperator.class));
     opvec.add(new OpTuple<CollectDesc>(CollectDesc.class, CollectOperator.class));
     opvec.add(new OpTuple<ScriptDesc>(ScriptDesc.class, ScriptOperator.class));
+    opvec.add(new OpTuple<PTFDesc>(PTFDesc.class, PTFOperator.class));
     opvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class, ReduceSinkOperator.class));
     opvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, ExtractOperator.class));
     opvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, GroupByOperator.class));
@@ -251,12 +255,32 @@ public final class OperatorFactory {
    * Returns an operator given the conf and a list of parent operators.
    */
   public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
+      RowSchema rwsch, Map<String, ExprNodeDesc> colExprMap, Operator... oplist) {
+    Operator<T> ret = getAndMakeChild(conf, rwsch, oplist);
+    ret.setColumnExprMap(colExprMap);    
+    return (ret);
+  }
+
+  /**
+   * Returns an operator given the conf and a list of parent operators.
+   */
+  public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
       RowSchema rwsch, List<Operator<? extends OperatorDesc>> oplist) {
     Operator<T> ret = getAndMakeChild(conf, oplist);
     ret.setSchema(rwsch);
     return (ret);
   }
 
+ /**
+   * Returns an operator given the conf and a list of parent operators.
+   */
+  public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
+      RowSchema rwsch, Map<String, ExprNodeDesc> colExprMap, List<Operator<? extends OperatorDesc>> oplist) {
+    Operator<T> ret = getAndMakeChild(conf, rwsch, oplist);
+    ret.setColumnExprMap(colExprMap);
+    return (ret);
+  }
+
   private OperatorFactory() {
     // prevent instantiation
   }

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java Fri Apr  5 10:34:08 2013
@@ -32,6 +32,10 @@ public class RowSchema implements Serial
   public RowSchema() {
   }
 
+  public RowSchema(RowSchema that) {
+    this.signature = (ArrayList<ColumnInfo>) that.signature.clone();
+  }
+
   public RowSchema(ArrayList<ColumnInfo> signature) {
     this.signature = signature;
   }

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Fri Apr  5 10:34:08 2013
@@ -94,6 +94,19 @@ public class TableScanOperator extends O
   @Override
   public void cleanUpInputFileChangedOp() throws HiveException {
     inputFileChanged = true;
+    // If the file name to bucket number mapping is maintained, store the bucket number
+    // in the execution context. This is needed for the following scenario:
+    // insert overwrite table T1 select * from T2;
+    // where T1 and T2 are sorted/bucketed by the same keys into the same number of buckets
+    // Although one mapper per file is used (bucketizedinputhiveinput), it is possible that
+    // any mapper can pick up any file (depending on the size of the files). The bucket number
+    // corresponding to the input file is stored to name the output bucket file appropriately.
+    Map<String, Integer> bucketNameMapping = conf != null ? conf.getBucketFileNameMapping() : null;
+    if ((bucketNameMapping != null) && (!bucketNameMapping.isEmpty())) {
+      String currentInputFile = getExecContext().getCurrentInputFile();
+      getExecContext().setFileId(Integer.toString(bucketNameMapping.get(
+          Utilities.getFileNameFromDirName(currentInputFile))));
+    }
   }
 
   private void gatherStats(Object row) {

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java Fri Apr  5 10:34:08 2013
@@ -47,7 +47,8 @@ public class LineageInfo implements Seri
    *                 set operations like union on columns on other tables
    *                 e.g. T2.c1 = T1.c1 + T3.c1.
    * 4. SCRIPT - Indicates that the column is derived from the output
-   *             of a user script through a TRANSFORM, MAP or REDUCE syntax.
+   *             of a user script through a TRANSFORM, MAP or REDUCE syntax
+   *             or from the output of a PTF chain execution.
    */
   public static enum DependencyType {
     SIMPLE, EXPRESSION, SCRIPT

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java Fri Apr  5 10:34:08 2013
@@ -116,17 +116,17 @@ final class OrcUnion implements UnionObj
 
     @Override
     public String getTypeName() {
-      StringBuilder builder = new StringBuilder("union{");
+      StringBuilder builder = new StringBuilder("uniontype<");
       boolean first = true;
       for(ObjectInspector child: children) {
         if (first) {
           first = false;
         } else {
-          builder.append(", ");
+          builder.append(",");
         }
         builder.append(child.getTypeName());
       }
-      builder.append("}");
+      builder.append(">");
       return builder.toString();
     }
 

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Fri Apr  5 10:34:08 2013
@@ -384,11 +384,10 @@ public class Partition implements Serial
   }
 
   /**
-   * mapping from bucket number to bucket path
+   * get all paths for this partition in a sorted manner
    */
-  // TODO: add test case and clean it up
   @SuppressWarnings("nls")
-  public Path getBucketPath(int bucketNum) {
+  public FileStatus[] getSortedPaths() {
     try {
       // Previously, this got the filesystem of the Table, which could be
       // different from the filesystem of the partition.
@@ -407,11 +406,23 @@ public class Partition implements Serial
       if (srcs.length == 0) {
         return null;
       }
-      return srcs[bucketNum].getPath();
+      return srcs;
     } catch (Exception e) {
-      throw new RuntimeException("Cannot get bucket path for bucket "
-          + bucketNum, e);
+      throw new RuntimeException("Cannot get path ", e);
+    }
+  }
+
+  /**
+   * mapping from bucket number to bucket path
+   */
+  // TODO: add test case and clean it up
+  @SuppressWarnings("nls")
+  public Path getBucketPath(int bucketNum) {
+    FileStatus srcs[] = getSortedPaths();
+    if (srcs == null) {
+      return null;
     }
+    return srcs[bucketNum].getPath();
   }
 
   @SuppressWarnings("nls")

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Fri Apr  5 10:34:08 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
@@ -31,6 +32,7 @@ import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.JavaUtils;
@@ -925,4 +927,30 @@ public class Table implements Serializab
     Hive hive = Hive.get();
     return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max);
   }
+
+  @SuppressWarnings("nls")
+  public FileStatus[] getSortedPaths() {
+    try {
+      // Previously, this got the filesystem of the Table, which could be
+      // different from the filesystem of the partition.
+      FileSystem fs = FileSystem.get(getPath().toUri(), Hive.get()
+          .getConf());
+      String pathPattern = getPath().toString();
+      if (getNumBuckets() > 0) {
+        pathPattern = pathPattern + "/*";
+      }
+      LOG.info("Path pattern = " + pathPattern);
+      FileStatus srcs[] = fs.globStatus(new Path(pathPattern));
+      Arrays.sort(srcs);
+      for (FileStatus src : srcs) {
+        LOG.info("Got file: " + src.getPath());
+      }
+      if (srcs.length == 0) {
+        return null;
+      }
+      return srcs;
+    } catch (Exception e) {
+      throw new RuntimeException("Cannot get path ", e);
+    }
+  }
 };

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java Fri Apr  5 10:34:08 2013
@@ -23,19 +23,19 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
 
-import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -114,6 +114,9 @@ public class ColumnPruner implements Tra
     opRules.put(new RuleRegExp("R9",
       LateralViewForwardOperator.getOperatorName() + "%"),
       ColumnPrunerProcFactory.getLateralViewForwardProc());
+    opRules.put(new RuleRegExp("R10",
+        PTFOperator.getOperatorName() + "%"),
+        ColumnPrunerProcFactory.getPTFProc());
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Fri Apr  5 10:34:08 2013
@@ -19,8 +19,10 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -31,6 +33,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -40,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.La
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.ScriptOperator;
@@ -62,11 +66,21 @@ import org.apache.hadoop.hive.ql.plan.Gr
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.ShapeDetails;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowExpressionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFunctionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowTableFunctionDef;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * Factory for generating the different node processors used by ColumnPruner.
@@ -148,6 +162,170 @@ public final class ColumnPrunerProcFacto
   }
 
   /**
+   * - Pruning can only be done for Windowing. PTFs are black boxes,
+   * we assume all columns are needed.
+   * - add column names referenced in WindowFn args and in WindowFn expressions
+   * to the pruned list of the child Select Op.
+   * - Prune the Column names & types serde properties in each of the Shapes in the PTF Chain:
+   *    - the InputDef's output shape
+   *    - Window Tabl Functions: window output shape & output shape.
+   * - Why is pruning the Column names & types in the serde properties enough?
+   *   - because during runtime we rebuild the OIs using these properties.
+   * - finally we set the prunedColList on the ColumnPrunerContx;
+   * and update the RR & signature on the PTFOp.
+   */
+  public static class ColumnPrunerPTFProc implements NodeProcessor {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+        Object... nodeOutputs) throws SemanticException {
+
+      PTFOperator op = (PTFOperator) nd;
+      PTFDesc conf = op.getConf();
+      //Since we cannot know what columns will be needed by a PTF chain,
+      //we do not prune columns on PTFOperator for PTF chains.
+      if (!conf.forWindowing()) {
+        return getDefaultProc().process(nd, stack, ctx, nodeOutputs);
+      }
+
+      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
+      WindowTableFunctionDef def = (WindowTableFunctionDef) conf.getFuncDef();
+      ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
+
+      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
+      //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
+      prunedCols = new ArrayList<String>(prunedCols);
+      prunedColumnsList(prunedCols, def);
+      setSerdePropsOfShape(def.getInput().getOutputShape(), prunedCols);
+      setSerdePropsOfShape(def.getOutputFromWdwFnProcessing(), prunedCols);
+      setSerdePropsOfShape(def.getOutputShape(), prunedCols);
+
+      RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
+      RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig);
+      cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def));
+      cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR);
+      op.getSchema().setSignature(sig);
+      return null;
+    }
+
+    private static RowResolver buildPrunedRR(List<String> prunedCols,
+        RowResolver oldRR, ArrayList<ColumnInfo> sig) throws SemanticException{
+      RowResolver newRR = new RowResolver();
+      HashSet<String> prunedColsSet = new HashSet<String>(prunedCols);
+      for(ColumnInfo cInfo : oldRR.getRowSchema().getSignature()) {
+        if ( prunedColsSet.contains(cInfo.getInternalName())) {
+          String[] nm = oldRR.reverseLookup(cInfo.getInternalName());
+          newRR.put(nm[0], nm[1], cInfo);
+          sig.add(cInfo);
+        }
+      }
+      return newRR;
+    }
+
+    /*
+     * add any input columns referenced in WindowFn args or expressions.
+     */
+    private void prunedColumnsList(List<String> prunedCols, WindowTableFunctionDef tDef) {
+      if ( tDef.getWindowFunctions() != null ) {
+        for(WindowFunctionDef wDef : tDef.getWindowFunctions() ) {
+          if ( wDef.getArgs() == null) {
+            continue;
+          }
+          for(PTFExpressionDef arg : wDef.getArgs()) {
+            ExprNodeDesc exprNode = arg.getExprNode();
+            Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+          }
+        }
+      }
+      if ( tDef.getWindowExpressions() != null ) {
+        for(WindowExpressionDef expr : tDef.getWindowExpressions()) {
+          ExprNodeDesc exprNode = expr.getExprNode();
+          Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+        }
+      }
+     if(tDef.getPartition() != null){
+         for(PTFExpressionDef col : tDef.getPartition().getExpressions()){
+           ExprNodeDesc exprNode = col.getExprNode();
+           Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+         }
+       }
+       if(tDef.getOrder() != null){
+         for(PTFExpressionDef col : tDef.getOrder().getExpressions()){
+           ExprNodeDesc exprNode = col.getExprNode();
+           Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+         }
+       }
+    }
+
+    private List<String> getLowerCasePrunedCols(List<String> prunedCols){
+      List<String> lowerCasePrunedCols = new ArrayList<String>();
+      for (String col : prunedCols) {
+        lowerCasePrunedCols.add(col.toLowerCase());
+      }
+      return lowerCasePrunedCols;
+    }
+
+    /*
+     * reconstruct Column names & types list based on the prunedCols list.
+     */
+    private void setSerdePropsOfShape(ShapeDetails shp, List<String> prunedCols) {
+      List<String> columnNames = Arrays.asList(shp.getSerdeProps().get(
+          org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS).split(","));
+      List<TypeInfo> columnTypes = TypeInfoUtils
+          .getTypeInfosFromTypeString(shp.getSerdeProps().get(
+              org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES));
+      /*
+       * fieldNames in OI are lower-cased. So we compare lower cased names for now.
+       */
+      prunedCols = getLowerCasePrunedCols(prunedCols);
+
+      StringBuilder cNames = new StringBuilder();
+      StringBuilder cTypes = new StringBuilder();
+
+      boolean addComma = false;
+      for(int i=0; i < columnNames.size(); i++) {
+        if ( prunedCols.contains(columnNames.get(i)) ) {
+          cNames.append(addComma ? "," : "");
+          cTypes.append(addComma ? "," : "");
+          cNames.append(columnNames.get(i));
+          cTypes.append(columnTypes.get(i));
+          addComma = true;
+        }
+      }
+      shp.getSerdeProps().put(
+          org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, cNames.toString());
+      shp.getSerdeProps().put(
+          org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, cTypes.toString());
+    }
+
+    /*
+     * from the prunedCols list filter out columns that refer to WindowFns or WindowExprs
+     * the returned list is set as the prunedList needed by the PTFOp.
+     */
+    private ArrayList<String> prunedInputList(List<String> prunedCols,
+        WindowTableFunctionDef tDef) {
+      ArrayList<String> prunedInputCols = new ArrayList<String>();
+
+      StructObjectInspector OI = tDef.getInput().getOutputShape().getOI();
+      for(StructField f : OI.getAllStructFieldRefs()) {
+        String fName = f.getFieldName();
+        if ( prunedCols.contains(fName)) {
+          prunedInputCols.add(fName);
+        }
+      }
+
+      return prunedInputCols;
+    }
+  }
+
+  /**
+   * Factory method to get the ColumnPrunerGroupByProc class.
+   *
+   * @return ColumnPrunerGroupByProc
+   */
+  public static ColumnPrunerPTFProc getPTFProc() {
+    return new ColumnPrunerPTFProc();
+  }
+
+  /**
    * The Default Node Processor for Column Pruning.
    */
   public static class ColumnPrunerDefaultProc implements NodeProcessor {
@@ -285,6 +463,39 @@ public final class ColumnPrunerProcFacto
         }
         Collections.sort(colLists);
         pruneReduceSinkOperator(flags, op, cppCtx);
+      } else if ((childOperators.size() == 1)
+          && (childOperators.get(0) instanceof ExtractOperator )
+          && (childOperators.get(0).getChildOperators().size() == 1)
+          && (childOperators.get(0).getChildOperators().get(0) instanceof PTFOperator )
+          && ((PTFOperator)childOperators.get(0).
+              getChildOperators().get(0)).getConf().forWindowing() )  {
+
+        /*
+         * For RS that are followed by Extract & PTFOp for windowing
+         * - do the same thing as above. Reconstruct ValueColumn list based on what is required
+         *   by the PTFOp.
+         */
+
+        assert parentOperators.size() == 1;
+
+        PTFOperator ptfOp = (PTFOperator) childOperators.get(0).getChildOperators().get(0);
+        List<String> childCols = cppCtx.getPrunedColList(ptfOp);
+        boolean[] flags = new boolean[conf.getValueCols().size()];
+        for (int i = 0; i < flags.length; i++) {
+          flags[i] = false;
+        }
+        if (childCols != null && childCols.size() > 0) {
+          ArrayList<String> outColNames = op.getConf().getOutputValueColumnNames();
+          for(int i=0; i < outColNames.size(); i++ ) {
+            if ( childCols.contains(outColNames.get(i))) {
+              ExprNodeDesc exprNode = op.getConf().getValueCols().get(i);
+              flags[i] = true;
+              Utilities.mergeUniqElems(colLists, exprNode.getCols());
+            }
+          }
+        }
+        Collections.sort(colLists);
+        pruneReduceSinkOperator(flags, op, cppCtx);
       } else {
         // Reduce Sink contains the columns needed - no need to aggregate from
         // children
@@ -831,4 +1042,4 @@ public final class ColumnPrunerProcFacto
     return new ColumnPrunerMapJoinProc();
   }
 
-}
+}
\ No newline at end of file

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Fri Apr  5 10:34:08 2013
@@ -178,7 +178,7 @@ public class GroupByOptimizer implements
       // Dont remove the operator for distincts
       if (useMapperSort && !groupByOp.getConf().isDistinct() &&
           (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
-        convertGroupByMapSideSortedGroupBy(groupByOp, depth);
+        convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
       }
       else if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
           (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
@@ -335,10 +335,13 @@ public class GroupByOptimizer implements
           throw new SemanticException(e.getMessage(), e);
         }
 
+        List<Partition> notDeniedPartns = partsList.getNotDeniedPartns();
+
         GroupByOptimizerSortMatch currentMatch =
-            partsList.getNotDeniedPartns().isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH :
+            notDeniedPartns.isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH :
+              notDeniedPartns.size() > 1 ? GroupByOptimizerSortMatch.PARTIAL_MATCH :
                 GroupByOptimizerSortMatch.COMPLETE_MATCH;
-        for (Partition part : partsList.getNotDeniedPartns()) {
+        for (Partition part : notDeniedPartns) {
           List<String> sortCols = part.getSortColNames();
           List<String> bucketCols = part.getBucketCols();
           GroupByOptimizerSortMatch match = matchBucketSortCols(groupByCols, bucketCols, sortCols);
@@ -452,7 +455,14 @@ public class GroupByOptimizer implements
 
     // Convert the group by to a map-side group by
     // The operators specified by depth and removed from the tree.
-    protected void convertGroupByMapSideSortedGroupBy(GroupByOperator groupByOp, int depth) {
+    protected void convertGroupByMapSideSortedGroupBy(
+        HiveConf conf, GroupByOperator groupByOp, int depth) {
+      // In test mode, dont change the query plan. However, setup a query property
+      pGraphContext.getQueryProperties().setHasMapGroupBy(true);
+      if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
+        return;
+      }
+
       if (groupByOp.removeChildren(depth)) {
         // Use bucketized hive input format - that makes sure that one mapper reads the entire file
         groupByOp.setUseBucketizedHiveInputFormat(true);

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Fri Apr  5 10:34:08 2013
@@ -90,6 +90,10 @@ public class Optimizer {
       transformations.add(new SortedMergeBucketMapJoinOptimizer());
     }
 
+    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
+      transformations.add(new BucketingSortingReduceSinkOptimizer());
+    }
+
     transformations.add(new UnionProcessor());
     transformations.add(new JoinReorder());
     if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java Fri Apr  5 10:34:08 2013
@@ -22,17 +22,17 @@ import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.Map;
 
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.UDTFOperator;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -82,6 +82,8 @@ public class Generator implements Transf
       OpProcFactory.getReduceSinkProc());
     opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"),
       OpProcFactory.getLateralViewJoinProc());
+    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"),
+        OpProcFactory.getTransformProc());
 
     // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java Fri Apr  5 10:34:08 2013
@@ -18,19 +18,23 @@
 package org.apache.hadoop.hive.ql.optimizer.unionproc;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Stack;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
@@ -187,8 +191,15 @@ public final class UnionProcFactory {
           for (int p = 0; p < numParents; p++) {
             OperatorDesc cloneDesc = (OperatorDesc)originalOp.getConf().clone();
 
+            RowSchema origSchema = originalOp.getSchema();
+            Map<String, ExprNodeDesc> origColExprMap = originalOp.getColumnExprMap();
+
             Operator<? extends OperatorDesc> cloneOp =
-              OperatorFactory.getAndMakeChild(cloneDesc, originalOp.getSchema(), parents.get(p));
+              OperatorFactory.getAndMakeChild(
+                cloneDesc, 
+                origSchema == null ? null : new RowSchema(origSchema), 
+                origColExprMap == null ? null : new HashMap(origColExprMap), 
+                parents.get(p));
             parents.set(p, cloneOp);
           }
         }

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Fri Apr  5 10:34:08 2013
@@ -37,10 +37,6 @@ public class ASTNode extends CommonTree 
   public ASTNode() {
   }
 
-  public ASTNode(ASTNode copy){
-    super(copy);
-  }
-
   /**
    * Constructor.
    *
@@ -51,6 +47,16 @@ public class ASTNode extends CommonTree 
     super(t);
   }
 
+  public ASTNode(ASTNode node) {
+    super(node);
+    this.origin = node.origin;
+  }
+
+  @Override
+  public Tree dupNode() {
+    return new ASTNode(this);
+  }
+
   /*
    * (non-Javadoc)
    *
@@ -95,12 +101,6 @@ public class ASTNode extends CommonTree 
     this.origin = origin;
   }
 
-  @Override
-  public Tree dupNode() {
-
-    return new ASTNode(this);
-  }
-
   public String dump() {
     StringBuilder sb = new StringBuilder();
 

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Fri Apr  5 10:34:08 2013
@@ -139,7 +139,7 @@ fromSource
 @init { gParent.msgs.push("from source"); }
 @after { gParent.msgs.pop(); }
     :
-    (tableSource | subQuerySource) (lateralView^)*
+    ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource) (lateralView^)*
     ;
 
 tableBucketSample
@@ -202,6 +202,38 @@ subQuerySource
     LPAREN queryStatementExpression RPAREN identifier -> ^(TOK_SUBQUERY queryStatementExpression identifier)
     ;
 
+//---------------------- Rules for parsing PTF clauses -----------------------------
+partitioningSpec
+@init { gParent.msgs.push("partitioningSpec clause"); }
+@after { gParent.msgs.pop(); } 
+   :
+   partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC partitionByClause orderByClause?) |
+   orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) |
+   distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC distributeByClause sortByClause?) |
+   sortByClause -> ^(TOK_PARTITIONINGSPEC sortByClause) |
+   clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause)
+   ;
+
+partitionTableFunctionSource
+@init { gParent.msgs.push("partitionTableFunctionSource clause"); }
+@after { gParent.msgs.pop(); } 
+   :
+   subQuerySource |
+   tableSource |
+   partitionedTableFunction
+   ;
+
+partitionedTableFunction
+@init { gParent.msgs.push("ptf clause"); }
+@after { gParent.msgs.pop(); } 
+   :
+   name=Identifier
+   LPAREN KW_ON ptfsrc=partitionTableFunctionSource partitioningSpec?
+     ((Identifier LPAREN expression RPAREN ) => Identifier LPAREN expression RPAREN ( COMMA Identifier LPAREN expression RPAREN)*)? 
+   RPAREN alias=Identifier?
+   ->   ^(TOK_PTBLFUNCTION $name $alias? partitionTableFunctionSource partitioningSpec? expression*)
+   ; 
+
 //----------------------- Rules for parsing whereClause -----------------------------
 // where a=b and ...
 whereClause

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Fri Apr  5 10:34:08 2013
@@ -242,6 +242,14 @@ KW_ROLLUP: 'ROLLUP';
 KW_CUBE: 'CUBE';
 KW_DIRECTORIES: 'DIRECTORIES';
 KW_FOR: 'FOR';
+KW_WINDOW: 'WINDOW';
+KW_UNBOUNDED: 'UNBOUNDED';
+KW_PRECEDING: 'PRECEDING';
+KW_FOLLOWING: 'FOLLOWING';
+KW_CURRENT: 'CURRENT';
+KW_LESS: 'LESS';
+KW_MORE: 'MORE';
+KW_OVER: 'OVER';
 KW_GROUPING: 'GROUPING';
 KW_SETS: 'SETS';
 KW_TRUNCATE: 'TRUNCATE';

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Apr  5 10:34:08 2013
@@ -288,6 +288,12 @@ TOK_SKEWED_LOCATIONS;
 TOK_SKEWED_LOCATION_LIST;
 TOK_SKEWED_LOCATION_MAP;
 TOK_STOREDASDIRS;
+TOK_PARTITIONINGSPEC;
+TOK_PTBLFUNCTION;
+TOK_WINDOWDEF;
+TOK_WINDOWSPEC;
+TOK_WINDOWVALUES;
+TOK_WINDOWRANGE;
 TOK_IGNOREPROTECTION;
 }
 
@@ -392,7 +398,7 @@ import java.util.HashMap;
     xlateMap.put("KW_COLLECTION", "COLLECTION");
     xlateMap.put("KW_ITEMS", "ITEMS");
     xlateMap.put("KW_KEYS", "KEYS");
-    xlateMap.put("KW_KEY_TYPE", "$KEY$");
+    xlateMap.put("KW_KEY_TYPE", "\$KEY\$");
     xlateMap.put("KW_LINES", "LINES");
     xlateMap.put("KW_STORED", "STORED");
     xlateMap.put("KW_SEQUENCEFILE", "SEQUENCEFILE");
@@ -420,8 +426,8 @@ import java.util.HashMap;
     xlateMap.put("KW_LIMIT", "LIMIT");
     xlateMap.put("KW_SET", "SET");
     xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
-    xlateMap.put("KW_VALUE_TYPE", "$VALUE$");
-    xlateMap.put("KW_ELEM_TYPE", "$ELEM$");
+    xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$");
+    xlateMap.put("KW_ELEM_TYPE", "\$ELEM\$");
 
     // Operators
     xlateMap.put("DOT", ".");
@@ -446,7 +452,7 @@ import java.util.HashMap;
     xlateMap.put("PLUS", "+");
     xlateMap.put("MINUS", "-");
     xlateMap.put("STAR", "*");
-    xlateMap.put("MOD", "%");
+    xlateMap.put("MOD", "\%");
 
     xlateMap.put("AMPERSAND", "&");
     xlateMap.put("TILDE", "~");
@@ -1792,9 +1798,10 @@ regular_body
    clusterByClause?
    distributeByClause?
    sortByClause?
+   window_clause?
    limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT insertClause
                      selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
-                     distributeByClause? sortByClause? limitClause?))
+                     distributeByClause? sortByClause? window_clause? limitClause?))
    |
    selectStatement
    ;
@@ -1810,9 +1817,10 @@ selectStatement
    clusterByClause?
    distributeByClause?
    sortByClause?
+   window_clause?
    limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
                      selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
-                     distributeByClause? sortByClause? limitClause?))
+                     distributeByClause? sortByClause? window_clause? limitClause?))
    ;
 
 
@@ -1827,9 +1835,10 @@ body
    clusterByClause?
    distributeByClause?
    sortByClause?
+   window_clause?
    limitClause? -> ^(TOK_INSERT insertClause?
                      selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
-                     distributeByClause? sortByClause? limitClause?)
+                     distributeByClause? sortByClause? window_clause? limitClause?)
    |
    selectClause
    whereClause?
@@ -1839,9 +1848,10 @@ body
    clusterByClause?
    distributeByClause?
    sortByClause?
+   window_clause?
    limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
                      selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
-                     distributeByClause? sortByClause? limitClause?)
+                     distributeByClause? sortByClause? window_clause? limitClause?)
    ;
 
 insertClause

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Fri Apr  5 10:34:08 2013
@@ -124,7 +124,18 @@ clusterByClause
     |
     KW_CLUSTER KW_BY
     expression
-    ( COMMA expression )* -> ^(TOK_CLUSTERBY expression+)
+    ( (COMMA)=>COMMA expression )* -> ^(TOK_CLUSTERBY expression+)
+    ;
+
+partitionByClause
+@init  { gParent.msgs.push("partition by clause"); }
+@after { gParent.msgs.pop(); }
+    :
+    KW_PARTITION KW_BY
+    LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_DISTRIBUTEBY expression+)
+    |
+    KW_PARTITION KW_BY
+    expression ((COMMA)=> COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
     ;
 
 distributeByClause
@@ -135,7 +146,7 @@ distributeByClause
     LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_DISTRIBUTEBY expression+)
     |
     KW_DISTRIBUTE KW_BY
-    expression (COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
+    expression ((COMMA)=> COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
     ;
 
 sortByClause
@@ -148,7 +159,7 @@ sortByClause
     |
     KW_SORT KW_BY
     columnRefOrder
-    ( COMMA columnRefOrder)* -> ^(TOK_SORTBY columnRefOrder+)
+    ( (COMMA)=> COMMA columnRefOrder)* -> ^(TOK_SORTBY columnRefOrder+)
     ;
 
 // fun(par1, par2, par3)
@@ -501,5 +512,5 @@ identifier
     
 nonReserved
     :
-    KW_TRUE | KW_FALSE | KW_ALL | KW_AND | KW_OR | KW_NOT | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_FROM | KW_AS | KW_DISTINCT | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_PRESERVE | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_COLUMN | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | K
 W_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_
 LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER
+    KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLE
 CTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED |
  KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER
     ;

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Fri Apr  5 10:34:08 2013
@@ -28,12 +28,15 @@ import java.util.Set;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.QueryProperties;
 import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
@@ -74,6 +77,8 @@ public class ParseContext {
   private Map<MapJoinOperator, QBJoinTree> mapJoinContext;
   private Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
   private HashMap<TableScanOperator, Table> topToTable;
+  private Map<FileSinkOperator, Table> fsopToTable;
+  private List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
   private HashMap<String, SplitSample> nameToSplitSample;
   private List<LoadTableDesc> loadTableWork;
   private List<LoadFileDesc> loadFileWork;
@@ -109,6 +114,7 @@ public class ParseContext {
   private List<Task<? extends Serializable>> rootTasks;
 
   private FetchTask fetchTask;
+  private QueryProperties queryProperties;
 
   public ParseContext() {
   }
@@ -164,6 +170,7 @@ public class ParseContext {
       Map<JoinOperator, QBJoinTree> joinContext,
       Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext,
       HashMap<TableScanOperator, Table> topToTable,
+      Map<FileSinkOperator, Table> fsopToTable,
       List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork,
       Context ctx, HashMap<String, String> idToTableNameMap, int destTableId,
       UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer,
@@ -174,7 +181,9 @@ public class ParseContext {
       HashMap<String, SplitSample> nameToSplitSample,
       HashSet<ReadEntity> semanticInputs, List<Task<? extends Serializable>> rootTasks,
       Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner,
-      Map<String, ReadEntity> viewAliasToInput) {
+      Map<String, ReadEntity> viewAliasToInput,
+      List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting,
+      QueryProperties queryProperties) {
     this.conf = conf;
     this.qb = qb;
     this.ast = ast;
@@ -183,6 +192,7 @@ public class ParseContext {
     this.joinContext = joinContext;
     this.smbMapJoinContext = smbMapJoinContext;
     this.topToTable = topToTable;
+    this.fsopToTable = fsopToTable;
     this.loadFileWork = loadFileWork;
     this.loadTableWork = loadTableWork;
     this.opParseCtx = opParseCtx;
@@ -203,6 +213,9 @@ public class ParseContext {
     this.rootTasks = rootTasks;
     this.opToPartToSkewedPruner = opToPartToSkewedPruner;
     this.viewAliasToInput = viewAliasToInput;
+    this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+        reduceSinkOperatorsAddedByEnforceBucketingSorting;
+    this.queryProperties = queryProperties;
   }
 
   /**
@@ -304,6 +317,24 @@ public class ParseContext {
     this.topToTable = topToTable;
   }
 
+  public Map<FileSinkOperator, Table> getFsopToTable() {
+    return fsopToTable;
+  }
+
+  public void setFsopToTable(Map<FileSinkOperator, Table> fsopToTable) {
+    this.fsopToTable = fsopToTable;
+  }
+
+  public List<ReduceSinkOperator> getReduceSinkOperatorsAddedByEnforceBucketingSorting() {
+    return reduceSinkOperatorsAddedByEnforceBucketingSorting;
+  }
+
+  public void setReduceSinkOperatorsAddedByEnforceBucketingSorting(
+      List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting) {
+    this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+        reduceSinkOperatorsAddedByEnforceBucketingSorting;
+  }
+
   /**
    * @return the topOps
    */
@@ -596,4 +627,12 @@ public class ParseContext {
   public Map<String, ReadEntity> getViewAliasToInput() {
     return viewAliasToInput;
   }
+
+  public QueryProperties getQueryProperties() {
+    return queryProperties;
+  }
+
+  public void setQueryProperties(QueryProperties queryProperties) {
+    this.queryProperties = queryProperties;
+  }
 }

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java Fri Apr  5 10:34:08 2013
@@ -203,4 +203,44 @@ public class ParseDriver {
 
     return (ASTNode) r.getTree();
   }
+
+
+  /*
+   * parse a String as a Select List. This allows table functions to be passed expression Strings
+   * that are translated in
+   * the context they define at invocation time. Currently used by NPath to allow users to specify
+   * what output they want.
+   * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This
+   * column doesn't exist in
+   * the input schema and hence the Result Expression cannot be analyzed by the regular Hive
+   * translation process.
+   */
+  public ASTNode parseSelect(String command, Context ctx) throws ParseException {
+    LOG.info("Parsing command: " + command);
+
+    HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
+    TokenRewriteStream tokens = new TokenRewriteStream(lexer);
+    if (ctx != null) {
+      ctx.setTokenRewriteStream(tokens);
+    }
+    HiveParser parser = new HiveParser(tokens);
+    parser.setTreeAdaptor(adaptor);
+    HiveParser_SelectClauseParser.selectClause_return r = null;
+    try {
+      r = parser.selectClause();
+    } catch (RecognitionException e) {
+      e.printStackTrace();
+      throw new ParseException(parser.errors);
+    }
+
+    if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
+      LOG.info("Parse Completed");
+    } else if (lexer.getErrors().size() != 0) {
+      throw new ParseException(lexer.getErrors());
+    } else {
+      throw new ParseException(parser.errors);
+    }
+
+    return (ASTNode) r.getTree();
+  }
 }

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java Fri Apr  5 10:34:08 2013
@@ -31,7 +31,7 @@ import org.apache.hadoop.hive.ql.metadat
 public class PrunedPartitionList {
 
   // source table
-  private Table source;
+  private final Table source;
 
   // confirmed partitions - satisfy the partition criteria
   private Set<Partition> confirmedPartns;
@@ -44,7 +44,7 @@ public class PrunedPartitionList {
 
   /**
    * @param confirmedPartns
-   *          confirmed paritions
+   *          confirmed partitions
    * @param unknownPartns
    *          unknown partitions
    */
@@ -62,7 +62,7 @@ public class PrunedPartitionList {
 
   /**
    * get confirmed partitions.
-   * 
+   *
    * @return confirmedPartns confirmed paritions
    */
   public Set<Partition> getConfirmedPartns() {
@@ -71,7 +71,7 @@ public class PrunedPartitionList {
 
   /**
    * get unknown partitions.
-   * 
+   *
    * @return unknownPartns unknown paritions
    */
   public Set<Partition> getUnknownPartns() {
@@ -80,7 +80,7 @@ public class PrunedPartitionList {
 
   /**
    * get denied partitions.
-   * 
+   *
    * @return deniedPartns denied paritions
    */
   public Set<Partition> getDeniedPartns() {
@@ -99,7 +99,7 @@ public class PrunedPartitionList {
 
   /**
    * set confirmed partitions.
-   * 
+   *
    * @param confirmedPartns
    *          confirmed paritions
    */
@@ -109,7 +109,7 @@ public class PrunedPartitionList {
 
   /**
    * set unknown partitions.
-   * 
+   *
    * @param unknownPartns
    *          unknown partitions
    */

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Fri Apr  5 10:34:08 2013
@@ -52,6 +52,16 @@ public class QB {
   private boolean isAnalyzeRewrite;
   private CreateTableDesc tblDesc = null; // table descriptor of the final
 
+  // used by PTFs
+  /*
+   * This map maintains the PTFInvocationSpec for each PTF chain invocation in this QB.
+   */
+  private HashMap<ASTNode, PTFInvocationSpec> ptfNodeToSpec;
+  /*
+   * the WindowingSpec used for windowing clauses in this QB.
+   */
+  private HashMap<String, WindowingSpec> destToWindowingSpec;
+
   // results
 
   public void print(String msg) {
@@ -76,6 +86,8 @@ public class QB {
     }
     qbp = new QBParseInfo(alias, isSubQ);
     qbm = new QBMetaData();
+    ptfNodeToSpec = new HashMap<ASTNode, PTFInvocationSpec>();
+    destToWindowingSpec = new HashMap<String, WindowingSpec>();
     id = getAppendedAliasFromId(outer_id, alias);
   }
 
@@ -246,4 +258,35 @@ public class QB {
   public void setAnalyzeRewrite(boolean isAnalyzeRewrite) {
     this.isAnalyzeRewrite = isAnalyzeRewrite;
   }
-}
+
+  public PTFInvocationSpec getPTFInvocationSpec(ASTNode node) {
+    return ptfNodeToSpec == null ? null : ptfNodeToSpec.get(node);
+  }
+
+  public void addPTFNodeToSpec(ASTNode node, PTFInvocationSpec spec) {
+    ptfNodeToSpec = ptfNodeToSpec == null ? new HashMap<ASTNode, PTFInvocationSpec>() : ptfNodeToSpec;
+    ptfNodeToSpec.put(node, spec);
+  }
+
+  public HashMap<ASTNode, PTFInvocationSpec> getPTFNodeToSpec() {
+    return ptfNodeToSpec;
+  }
+
+  public WindowingSpec getWindowingSpec(String dest) {
+    return destToWindowingSpec.get(dest);
+  }
+
+  public void addDestToWindowingSpec(String dest, WindowingSpec windowingSpec) {
+    destToWindowingSpec.put(dest, windowingSpec);
+  }
+
+  public boolean hasWindowingSpec(String dest) {
+    return destToWindowingSpec.get(dest) != null;
+  }
+
+  public HashMap<String, WindowingSpec> getAllWindowingSpecs() {
+    return destToWindowingSpec;
+  }
+
+
+}
\ No newline at end of file

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java Fri Apr  5 10:34:08 2013
@@ -68,6 +68,7 @@ public class QBParseInfo {
   private String partName;  // used for column statistics
   private boolean isTblLvl; // used for column statistics
 
+
   /**
    * ClusterBy is a short name for both DistributeBy and SortBy.
    */
@@ -98,6 +99,10 @@ public class QBParseInfo {
   private final LinkedHashMap<String, LinkedHashMap<String, ASTNode>> destToAggregationExprs;
   private final HashMap<String, List<ASTNode>> destToDistinctFuncExprs;
 
+  // used by Windowing
+  private final LinkedHashMap<String, LinkedHashMap<String, ASTNode>> destToWindowingExprs;
+
+
   @SuppressWarnings("unused")
   private static final Log LOG = LogFactory.getLog(QBParseInfo.class.getName());
 
@@ -121,6 +126,7 @@ public class QBParseInfo {
     destGroupingSets = new HashSet<String>();
 
     destToAggregationExprs = new LinkedHashMap<String, LinkedHashMap<String, ASTNode>>();
+    destToWindowingExprs = new LinkedHashMap<String, LinkedHashMap<String, ASTNode>>();
     destToDistinctFuncExprs = new HashMap<String, List<ASTNode>>();
 
     this.alias = alias;
@@ -133,6 +139,14 @@ public class QBParseInfo {
 
   }
 
+  /*
+   * If a QB is such that the aggregation expressions need to be handled by
+   * the Windowing PTF; we invoke this function to clear the AggExprs on the dest.
+   */
+  public void clearAggregationExprsForClause(String clause) {
+    destToAggregationExprs.get(clause).clear();
+  }
+
   public void setAggregationExprsForClause(String clause,
       LinkedHashMap<String, ASTNode> aggregationTrees) {
     destToAggregationExprs.put(clause, aggregationTrees);
@@ -160,6 +174,26 @@ public class QBParseInfo {
     return destToAggregationExprs.get(clause);
   }
 
+  public void addWindowingExprToClause(String clause, ASTNode windowingExprNode) {
+    LinkedHashMap<String, ASTNode> windowingExprs = destToWindowingExprs.get(clause);
+    if ( windowingExprs == null ) {
+      windowingExprs = new LinkedHashMap<String, ASTNode>();
+      destToWindowingExprs.put(clause, windowingExprs);
+    }
+    windowingExprs.put(windowingExprNode.toStringTree(), windowingExprNode);
+  }
+
+  public HashMap<String, ASTNode> getWindowingExprsForClause(String clause) {
+    return destToWindowingExprs.get(clause);
+  }
+
+  public void clearDistinctFuncExprsForClause(String clause) {
+    List<ASTNode> l = destToDistinctFuncExprs.get(clause);
+    if ( l != null ) {
+      l.clear();
+    }
+  }
+
   public void setDistinctFuncExprsForClause(String clause, List<ASTNode> ast) {
     destToDistinctFuncExprs.put(clause, ast);
   }
@@ -594,3 +628,5 @@ public class QBParseInfo {
     this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand;
   }
 }
+
+

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g Fri Apr  5 10:34:08 2013
@@ -125,7 +125,9 @@ selectItem
 @init { gParent.msgs.push("selection target"); }
 @after { gParent.msgs.pop(); }
     :
-    ( selectExpression  ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))?) -> ^(TOK_SELEXPR selectExpression identifier*)
+    ( selectExpression (KW_OVER ws=window_specification )?
+      ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))?
+    ) -> ^(TOK_SELEXPR selectExpression identifier* $ws?)
     ;
 
 trfmClause
@@ -155,3 +157,64 @@ selectExpressionList
     selectExpression (COMMA selectExpression)* -> ^(TOK_EXPLIST selectExpression+)
     ;
 
+//---------------------- Rules for windowing clauses -------------------------------
+window_clause 
+@init { gParent.msgs.push("window_clause"); }
+@after { gParent.msgs.pop(); } 
+:
+  KW_WINDOW window_defn (COMMA window_defn)* -> ^(KW_WINDOW window_defn+)
+;  
+
+window_defn 
+@init { gParent.msgs.push("window_defn"); }
+@after { gParent.msgs.pop(); } 
+:
+  Identifier KW_AS window_specification -> ^(TOK_WINDOWDEF Identifier window_specification)
+;  
+
+window_specification 
+@init { gParent.msgs.push("window_specification"); }
+@after { gParent.msgs.pop(); } 
+:
+  (Identifier | ( LPAREN Identifier? partitioningSpec? window_frame? RPAREN)) -> ^(TOK_WINDOWSPEC Identifier? partitioningSpec? window_frame?)
+;
+
+window_frame :
+ window_range_expression |
+ window_value_expression
+;
+
+window_range_expression 
+@init { gParent.msgs.push("window_range_expression"); }
+@after { gParent.msgs.pop(); } 
+:
+ KW_ROWS sb=window_frame_start_boundary -> ^(TOK_WINDOWRANGE $sb) |
+ KW_ROWS KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWRANGE $s $end)
+;
+
+window_value_expression 
+@init { gParent.msgs.push("window_value_expression"); }
+@after { gParent.msgs.pop(); } 
+:
+ KW_RANGE sb=window_frame_start_boundary -> ^(TOK_WINDOWVALUES $sb) |
+ KW_RANGE KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWVALUES $s $end)
+;
+
+window_frame_start_boundary 
+@init { gParent.msgs.push("windowframestartboundary"); }
+@after { gParent.msgs.pop(); } 
+:
+  KW_UNBOUNDED KW_PRECEDING  -> ^(KW_PRECEDING KW_UNBOUNDED) | 
+  KW_CURRENT KW_ROW  -> ^(KW_CURRENT) |
+  Number KW_PRECEDING -> ^(KW_PRECEDING Number)
+;
+
+window_frame_boundary 
+@init { gParent.msgs.push("windowframeboundary"); }
+@after { gParent.msgs.pop(); } 
+:
+  KW_UNBOUNDED (r=KW_PRECEDING|r=KW_FOLLOWING)  -> ^($r KW_UNBOUNDED) | 
+  KW_CURRENT KW_ROW  -> ^(KW_CURRENT) |
+  Number (d=KW_PRECEDING | d=KW_FOLLOWING ) -> ^($d Number)
+;   
+