You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/04/05 12:34:11 UTC
svn commit: r1464915 [2/5] - in /hive/branches/HIVE-4115: ./
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/
hcatalog/build-support/ant/ hcatalog/historical/branches/
hcatalog/historical/sit...
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Fri Apr 5 10:34:08 2013
@@ -20,10 +20,12 @@ package org.apache.hadoop.hive.ql.exec;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.ql.plan.CollectDesc;
import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
@@ -36,6 +38,7 @@ import org.apache.hadoop.hive.ql.plan.La
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
@@ -74,6 +77,7 @@ public final class OperatorFactory {
opvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, FileSinkOperator.class));
opvec.add(new OpTuple<CollectDesc>(CollectDesc.class, CollectOperator.class));
opvec.add(new OpTuple<ScriptDesc>(ScriptDesc.class, ScriptOperator.class));
+ opvec.add(new OpTuple<PTFDesc>(PTFDesc.class, PTFOperator.class));
opvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class, ReduceSinkOperator.class));
opvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, ExtractOperator.class));
opvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, GroupByOperator.class));
@@ -251,12 +255,32 @@ public final class OperatorFactory {
* Returns an operator given the conf and a list of parent operators.
*/
public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
+ RowSchema rwsch, Map<String, ExprNodeDesc> colExprMap, Operator... oplist) {
+ Operator<T> ret = getAndMakeChild(conf, rwsch, oplist);
+ ret.setColumnExprMap(colExprMap);
+ return (ret);
+ }
+
+ /**
+ * Returns an operator given the conf and a list of parent operators.
+ */
+ public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
RowSchema rwsch, List<Operator<? extends OperatorDesc>> oplist) {
Operator<T> ret = getAndMakeChild(conf, oplist);
ret.setSchema(rwsch);
return (ret);
}
+ /**
+ * Returns an operator given the conf and a list of parent operators.
+ */
+ public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
+ RowSchema rwsch, Map<String, ExprNodeDesc> colExprMap, List<Operator<? extends OperatorDesc>> oplist) {
+ Operator<T> ret = getAndMakeChild(conf, rwsch, oplist);
+ ret.setColumnExprMap(colExprMap);
+ return (ret);
+ }
+
private OperatorFactory() {
// prevent instantiation
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java Fri Apr 5 10:34:08 2013
@@ -32,6 +32,10 @@ public class RowSchema implements Serial
public RowSchema() {
}
+ public RowSchema(RowSchema that) {
+ this.signature = (ArrayList<ColumnInfo>) that.signature.clone();
+ }
+
public RowSchema(ArrayList<ColumnInfo> signature) {
this.signature = signature;
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Fri Apr 5 10:34:08 2013
@@ -94,6 +94,19 @@ public class TableScanOperator extends O
@Override
public void cleanUpInputFileChangedOp() throws HiveException {
inputFileChanged = true;
+ // If the file name to bucket number mapping is maintained, store the bucket number
+ // in the execution context. This is needed for the following scenario:
+ // insert overwrite table T1 select * from T2;
+ // where T1 and T2 are sorted/bucketed by the same keys into the same number of buckets
+ // Although one mapper per file is used (bucketizedinputhiveinput), it is possible that
+ // any mapper can pick up any file (depending on the size of the files). The bucket number
+ // corresponding to the input file is stored to name the output bucket file appropriately.
+ Map<String, Integer> bucketNameMapping = conf != null ? conf.getBucketFileNameMapping() : null;
+ if ((bucketNameMapping != null) && (!bucketNameMapping.isEmpty())) {
+ String currentInputFile = getExecContext().getCurrentInputFile();
+ getExecContext().setFileId(Integer.toString(bucketNameMapping.get(
+ Utilities.getFileNameFromDirName(currentInputFile))));
+ }
}
private void gatherStats(Object row) {
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java Fri Apr 5 10:34:08 2013
@@ -47,7 +47,8 @@ public class LineageInfo implements Seri
* set operations like union on columns on other tables
* e.g. T2.c1 = T1.c1 + T3.c1.
* 4. SCRIPT - Indicates that the column is derived from the output
- * of a user script through a TRANSFORM, MAP or REDUCE syntax.
+ * of a user script through a TRANSFORM, MAP or REDUCE syntax
+ * or from the output of a PTF chain execution.
*/
public static enum DependencyType {
SIMPLE, EXPRESSION, SCRIPT
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java Fri Apr 5 10:34:08 2013
@@ -116,17 +116,17 @@ final class OrcUnion implements UnionObj
@Override
public String getTypeName() {
- StringBuilder builder = new StringBuilder("union{");
+ StringBuilder builder = new StringBuilder("uniontype<");
boolean first = true;
for(ObjectInspector child: children) {
if (first) {
first = false;
} else {
- builder.append(", ");
+ builder.append(",");
}
builder.append(child.getTypeName());
}
- builder.append("}");
+ builder.append(">");
return builder.toString();
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Fri Apr 5 10:34:08 2013
@@ -384,11 +384,10 @@ public class Partition implements Serial
}
/**
- * mapping from bucket number to bucket path
+ * get all paths for this partition in a sorted manner
*/
- // TODO: add test case and clean it up
@SuppressWarnings("nls")
- public Path getBucketPath(int bucketNum) {
+ public FileStatus[] getSortedPaths() {
try {
// Previously, this got the filesystem of the Table, which could be
// different from the filesystem of the partition.
@@ -407,11 +406,23 @@ public class Partition implements Serial
if (srcs.length == 0) {
return null;
}
- return srcs[bucketNum].getPath();
+ return srcs;
} catch (Exception e) {
- throw new RuntimeException("Cannot get bucket path for bucket "
- + bucketNum, e);
+ throw new RuntimeException("Cannot get path ", e);
+ }
+ }
+
+ /**
+ * mapping from bucket number to bucket path
+ */
+ // TODO: add test case and clean it up
+ @SuppressWarnings("nls")
+ public Path getBucketPath(int bucketNum) {
+ FileStatus srcs[] = getSortedPaths();
+ if (srcs == null) {
+ return null;
}
+ return srcs[bucketNum].getPath();
}
@SuppressWarnings("nls")
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Fri Apr 5 10:34:08 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -31,6 +32,7 @@ import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.JavaUtils;
@@ -925,4 +927,30 @@ public class Table implements Serializab
Hive hive = Hive.get();
return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max);
}
+
+ @SuppressWarnings("nls")
+ public FileStatus[] getSortedPaths() {
+ try {
+ // Previously, this got the filesystem of the Table, which could be
+ // different from the filesystem of the partition.
+ FileSystem fs = FileSystem.get(getPath().toUri(), Hive.get()
+ .getConf());
+ String pathPattern = getPath().toString();
+ if (getNumBuckets() > 0) {
+ pathPattern = pathPattern + "/*";
+ }
+ LOG.info("Path pattern = " + pathPattern);
+ FileStatus srcs[] = fs.globStatus(new Path(pathPattern));
+ Arrays.sort(srcs);
+ for (FileStatus src : srcs) {
+ LOG.info("Got file: " + src.getPath());
+ }
+ if (srcs.length == 0) {
+ return null;
+ }
+ return srcs;
+ } catch (Exception e) {
+ throw new RuntimeException("Cannot get path ", e);
+ }
+ }
};
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java Fri Apr 5 10:34:08 2013
@@ -23,19 +23,19 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
-import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -114,6 +114,9 @@ public class ColumnPruner implements Tra
opRules.put(new RuleRegExp("R9",
LateralViewForwardOperator.getOperatorName() + "%"),
ColumnPrunerProcFactory.getLateralViewForwardProc());
+ opRules.put(new RuleRegExp("R10",
+ PTFOperator.getOperatorName() + "%"),
+ ColumnPrunerProcFactory.getPTFProc());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Fri Apr 5 10:34:08 2013
@@ -19,8 +19,10 @@
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -31,6 +33,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -40,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.La
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.ScriptOperator;
@@ -62,11 +66,21 @@ import org.apache.hadoop.hive.ql.plan.Gr
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.ShapeDetails;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowExpressionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFunctionDef;
+import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowTableFunctionDef;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
/**
* Factory for generating the different node processors used by ColumnPruner.
@@ -148,6 +162,170 @@ public final class ColumnPrunerProcFacto
}
/**
+ * - Pruning can only be done for Windowing. PTFs are black boxes,
+ * we assume all columns are needed.
+ * - add column names referenced in WindowFn args and in WindowFn expressions
+ * to the pruned list of the child Select Op.
+ * - Prune the Column names & types serde properties in each of the Shapes in the PTF Chain:
+ * - the InputDef's output shape
+ * - Window Tabl Functions: window output shape & output shape.
+ * - Why is pruning the Column names & types in the serde properties enough?
+ * - because during runtime we rebuild the OIs using these properties.
+ * - finally we set the prunedColList on the ColumnPrunerContx;
+ * and update the RR & signature on the PTFOp.
+ */
+ public static class ColumnPrunerPTFProc implements NodeProcessor {
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+ Object... nodeOutputs) throws SemanticException {
+
+ PTFOperator op = (PTFOperator) nd;
+ PTFDesc conf = op.getConf();
+ //Since we cannot know what columns will be needed by a PTF chain,
+ //we do not prune columns on PTFOperator for PTF chains.
+ if (!conf.forWindowing()) {
+ return getDefaultProc().process(nd, stack, ctx, nodeOutputs);
+ }
+
+ ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
+ WindowTableFunctionDef def = (WindowTableFunctionDef) conf.getFuncDef();
+ ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
+
+ List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
+ //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
+ prunedCols = new ArrayList<String>(prunedCols);
+ prunedColumnsList(prunedCols, def);
+ setSerdePropsOfShape(def.getInput().getOutputShape(), prunedCols);
+ setSerdePropsOfShape(def.getOutputFromWdwFnProcessing(), prunedCols);
+ setSerdePropsOfShape(def.getOutputShape(), prunedCols);
+
+ RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
+ RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig);
+ cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def));
+ cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR);
+ op.getSchema().setSignature(sig);
+ return null;
+ }
+
+ private static RowResolver buildPrunedRR(List<String> prunedCols,
+ RowResolver oldRR, ArrayList<ColumnInfo> sig) throws SemanticException{
+ RowResolver newRR = new RowResolver();
+ HashSet<String> prunedColsSet = new HashSet<String>(prunedCols);
+ for(ColumnInfo cInfo : oldRR.getRowSchema().getSignature()) {
+ if ( prunedColsSet.contains(cInfo.getInternalName())) {
+ String[] nm = oldRR.reverseLookup(cInfo.getInternalName());
+ newRR.put(nm[0], nm[1], cInfo);
+ sig.add(cInfo);
+ }
+ }
+ return newRR;
+ }
+
+ /*
+ * add any input columns referenced in WindowFn args or expressions.
+ */
+ private void prunedColumnsList(List<String> prunedCols, WindowTableFunctionDef tDef) {
+ if ( tDef.getWindowFunctions() != null ) {
+ for(WindowFunctionDef wDef : tDef.getWindowFunctions() ) {
+ if ( wDef.getArgs() == null) {
+ continue;
+ }
+ for(PTFExpressionDef arg : wDef.getArgs()) {
+ ExprNodeDesc exprNode = arg.getExprNode();
+ Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ }
+ }
+ }
+ if ( tDef.getWindowExpressions() != null ) {
+ for(WindowExpressionDef expr : tDef.getWindowExpressions()) {
+ ExprNodeDesc exprNode = expr.getExprNode();
+ Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ }
+ }
+ if(tDef.getPartition() != null){
+ for(PTFExpressionDef col : tDef.getPartition().getExpressions()){
+ ExprNodeDesc exprNode = col.getExprNode();
+ Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ }
+ }
+ if(tDef.getOrder() != null){
+ for(PTFExpressionDef col : tDef.getOrder().getExpressions()){
+ ExprNodeDesc exprNode = col.getExprNode();
+ Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ }
+ }
+ }
+
+ private List<String> getLowerCasePrunedCols(List<String> prunedCols){
+ List<String> lowerCasePrunedCols = new ArrayList<String>();
+ for (String col : prunedCols) {
+ lowerCasePrunedCols.add(col.toLowerCase());
+ }
+ return lowerCasePrunedCols;
+ }
+
+ /*
+ * reconstruct Column names & types list based on the prunedCols list.
+ */
+ private void setSerdePropsOfShape(ShapeDetails shp, List<String> prunedCols) {
+ List<String> columnNames = Arrays.asList(shp.getSerdeProps().get(
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS).split(","));
+ List<TypeInfo> columnTypes = TypeInfoUtils
+ .getTypeInfosFromTypeString(shp.getSerdeProps().get(
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES));
+ /*
+ * fieldNames in OI are lower-cased. So we compare lower cased names for now.
+ */
+ prunedCols = getLowerCasePrunedCols(prunedCols);
+
+ StringBuilder cNames = new StringBuilder();
+ StringBuilder cTypes = new StringBuilder();
+
+ boolean addComma = false;
+ for(int i=0; i < columnNames.size(); i++) {
+ if ( prunedCols.contains(columnNames.get(i)) ) {
+ cNames.append(addComma ? "," : "");
+ cTypes.append(addComma ? "," : "");
+ cNames.append(columnNames.get(i));
+ cTypes.append(columnTypes.get(i));
+ addComma = true;
+ }
+ }
+ shp.getSerdeProps().put(
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, cNames.toString());
+ shp.getSerdeProps().put(
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, cTypes.toString());
+ }
+
+ /*
+ * from the prunedCols list filter out columns that refer to WindowFns or WindowExprs
+ * the returned list is set as the prunedList needed by the PTFOp.
+ */
+ private ArrayList<String> prunedInputList(List<String> prunedCols,
+ WindowTableFunctionDef tDef) {
+ ArrayList<String> prunedInputCols = new ArrayList<String>();
+
+ StructObjectInspector OI = tDef.getInput().getOutputShape().getOI();
+ for(StructField f : OI.getAllStructFieldRefs()) {
+ String fName = f.getFieldName();
+ if ( prunedCols.contains(fName)) {
+ prunedInputCols.add(fName);
+ }
+ }
+
+ return prunedInputCols;
+ }
+ }
+
+ /**
+ * Factory method to get the ColumnPrunerGroupByProc class.
+ *
+ * @return ColumnPrunerGroupByProc
+ */
+ public static ColumnPrunerPTFProc getPTFProc() {
+ return new ColumnPrunerPTFProc();
+ }
+
+ /**
* The Default Node Processor for Column Pruning.
*/
public static class ColumnPrunerDefaultProc implements NodeProcessor {
@@ -285,6 +463,39 @@ public final class ColumnPrunerProcFacto
}
Collections.sort(colLists);
pruneReduceSinkOperator(flags, op, cppCtx);
+ } else if ((childOperators.size() == 1)
+ && (childOperators.get(0) instanceof ExtractOperator )
+ && (childOperators.get(0).getChildOperators().size() == 1)
+ && (childOperators.get(0).getChildOperators().get(0) instanceof PTFOperator )
+ && ((PTFOperator)childOperators.get(0).
+ getChildOperators().get(0)).getConf().forWindowing() ) {
+
+ /*
+ * For RS that are followed by Extract & PTFOp for windowing
+ * - do the same thing as above. Reconstruct ValueColumn list based on what is required
+ * by the PTFOp.
+ */
+
+ assert parentOperators.size() == 1;
+
+ PTFOperator ptfOp = (PTFOperator) childOperators.get(0).getChildOperators().get(0);
+ List<String> childCols = cppCtx.getPrunedColList(ptfOp);
+ boolean[] flags = new boolean[conf.getValueCols().size()];
+ for (int i = 0; i < flags.length; i++) {
+ flags[i] = false;
+ }
+ if (childCols != null && childCols.size() > 0) {
+ ArrayList<String> outColNames = op.getConf().getOutputValueColumnNames();
+ for(int i=0; i < outColNames.size(); i++ ) {
+ if ( childCols.contains(outColNames.get(i))) {
+ ExprNodeDesc exprNode = op.getConf().getValueCols().get(i);
+ flags[i] = true;
+ Utilities.mergeUniqElems(colLists, exprNode.getCols());
+ }
+ }
+ }
+ Collections.sort(colLists);
+ pruneReduceSinkOperator(flags, op, cppCtx);
} else {
// Reduce Sink contains the columns needed - no need to aggregate from
// children
@@ -831,4 +1042,4 @@ public final class ColumnPrunerProcFacto
return new ColumnPrunerMapJoinProc();
}
-}
+}
\ No newline at end of file
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Fri Apr 5 10:34:08 2013
@@ -178,7 +178,7 @@ public class GroupByOptimizer implements
// Dont remove the operator for distincts
if (useMapperSort && !groupByOp.getConf().isDistinct() &&
(match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
- convertGroupByMapSideSortedGroupBy(groupByOp, depth);
+ convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
}
else if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
(match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
@@ -335,10 +335,13 @@ public class GroupByOptimizer implements
throw new SemanticException(e.getMessage(), e);
}
+ List<Partition> notDeniedPartns = partsList.getNotDeniedPartns();
+
GroupByOptimizerSortMatch currentMatch =
- partsList.getNotDeniedPartns().isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH :
+ notDeniedPartns.isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH :
+ notDeniedPartns.size() > 1 ? GroupByOptimizerSortMatch.PARTIAL_MATCH :
GroupByOptimizerSortMatch.COMPLETE_MATCH;
- for (Partition part : partsList.getNotDeniedPartns()) {
+ for (Partition part : notDeniedPartns) {
List<String> sortCols = part.getSortColNames();
List<String> bucketCols = part.getBucketCols();
GroupByOptimizerSortMatch match = matchBucketSortCols(groupByCols, bucketCols, sortCols);
@@ -452,7 +455,14 @@ public class GroupByOptimizer implements
// Convert the group by to a map-side group by
// The operators specified by depth and removed from the tree.
- protected void convertGroupByMapSideSortedGroupBy(GroupByOperator groupByOp, int depth) {
+ protected void convertGroupByMapSideSortedGroupBy(
+ HiveConf conf, GroupByOperator groupByOp, int depth) {
+ // In test mode, dont change the query plan. However, setup a query property
+ pGraphContext.getQueryProperties().setHasMapGroupBy(true);
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
+ return;
+ }
+
if (groupByOp.removeChildren(depth)) {
// Use bucketized hive input format - that makes sure that one mapper reads the entire file
groupByOp.setUseBucketizedHiveInputFormat(true);
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Fri Apr 5 10:34:08 2013
@@ -90,6 +90,10 @@ public class Optimizer {
transformations.add(new SortedMergeBucketMapJoinOptimizer());
}
+ if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
+ transformations.add(new BucketingSortingReduceSinkOptimizer());
+ }
+
transformations.add(new UnionProcessor());
transformations.add(new JoinReorder());
if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java Fri Apr 5 10:34:08 2013
@@ -22,17 +22,17 @@ import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDTFOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -82,6 +82,8 @@ public class Generator implements Transf
OpProcFactory.getReduceSinkProc());
opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"),
OpProcFactory.getLateralViewJoinProc());
+ opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"),
+ OpProcFactory.getTransformProc());
// The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java Fri Apr 5 10:34:08 2013
@@ -18,19 +18,23 @@
package org.apache.hadoop.hive.ql.optimizer.unionproc;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -187,8 +191,15 @@ public final class UnionProcFactory {
for (int p = 0; p < numParents; p++) {
OperatorDesc cloneDesc = (OperatorDesc)originalOp.getConf().clone();
+ RowSchema origSchema = originalOp.getSchema();
+ Map<String, ExprNodeDesc> origColExprMap = originalOp.getColumnExprMap();
+
Operator<? extends OperatorDesc> cloneOp =
- OperatorFactory.getAndMakeChild(cloneDesc, originalOp.getSchema(), parents.get(p));
+ OperatorFactory.getAndMakeChild(
+ cloneDesc,
+ origSchema == null ? null : new RowSchema(origSchema),
+ origColExprMap == null ? null : new HashMap(origColExprMap),
+ parents.get(p));
parents.set(p, cloneOp);
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Fri Apr 5 10:34:08 2013
@@ -37,10 +37,6 @@ public class ASTNode extends CommonTree
public ASTNode() {
}
- public ASTNode(ASTNode copy){
- super(copy);
- }
-
/**
* Constructor.
*
@@ -51,6 +47,16 @@ public class ASTNode extends CommonTree
super(t);
}
+ public ASTNode(ASTNode node) {
+ super(node);
+ this.origin = node.origin;
+ }
+
+ @Override
+ public Tree dupNode() {
+ return new ASTNode(this);
+ }
+
/*
* (non-Javadoc)
*
@@ -95,12 +101,6 @@ public class ASTNode extends CommonTree
this.origin = origin;
}
- @Override
- public Tree dupNode() {
-
- return new ASTNode(this);
- }
-
public String dump() {
StringBuilder sb = new StringBuilder();
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Fri Apr 5 10:34:08 2013
@@ -139,7 +139,7 @@ fromSource
@init { gParent.msgs.push("from source"); }
@after { gParent.msgs.pop(); }
:
- (tableSource | subQuerySource) (lateralView^)*
+ ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource) (lateralView^)*
;
tableBucketSample
@@ -202,6 +202,38 @@ subQuerySource
LPAREN queryStatementExpression RPAREN identifier -> ^(TOK_SUBQUERY queryStatementExpression identifier)
;
+//---------------------- Rules for parsing PTF clauses -----------------------------
+partitioningSpec
+@init { gParent.msgs.push("partitioningSpec clause"); }
+@after { gParent.msgs.pop(); }
+ :
+ partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC partitionByClause orderByClause?) |
+ orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) |
+ distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC distributeByClause sortByClause?) |
+ sortByClause -> ^(TOK_PARTITIONINGSPEC sortByClause) |
+ clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause)
+ ;
+
+partitionTableFunctionSource
+@init { gParent.msgs.push("partitionTableFunctionSource clause"); }
+@after { gParent.msgs.pop(); }
+ :
+ subQuerySource |
+ tableSource |
+ partitionedTableFunction
+ ;
+
+partitionedTableFunction
+@init { gParent.msgs.push("ptf clause"); }
+@after { gParent.msgs.pop(); }
+ :
+ name=Identifier
+ LPAREN KW_ON ptfsrc=partitionTableFunctionSource partitioningSpec?
+ ((Identifier LPAREN expression RPAREN ) => Identifier LPAREN expression RPAREN ( COMMA Identifier LPAREN expression RPAREN)*)?
+ RPAREN alias=Identifier?
+ -> ^(TOK_PTBLFUNCTION $name $alias? partitionTableFunctionSource partitioningSpec? expression*)
+ ;
+
//----------------------- Rules for parsing whereClause -----------------------------
// where a=b and ...
whereClause
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Fri Apr 5 10:34:08 2013
@@ -242,6 +242,14 @@ KW_ROLLUP: 'ROLLUP';
KW_CUBE: 'CUBE';
KW_DIRECTORIES: 'DIRECTORIES';
KW_FOR: 'FOR';
+KW_WINDOW: 'WINDOW';
+KW_UNBOUNDED: 'UNBOUNDED';
+KW_PRECEDING: 'PRECEDING';
+KW_FOLLOWING: 'FOLLOWING';
+KW_CURRENT: 'CURRENT';
+KW_LESS: 'LESS';
+KW_MORE: 'MORE';
+KW_OVER: 'OVER';
KW_GROUPING: 'GROUPING';
KW_SETS: 'SETS';
KW_TRUNCATE: 'TRUNCATE';
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Apr 5 10:34:08 2013
@@ -288,6 +288,12 @@ TOK_SKEWED_LOCATIONS;
TOK_SKEWED_LOCATION_LIST;
TOK_SKEWED_LOCATION_MAP;
TOK_STOREDASDIRS;
+TOK_PARTITIONINGSPEC;
+TOK_PTBLFUNCTION;
+TOK_WINDOWDEF;
+TOK_WINDOWSPEC;
+TOK_WINDOWVALUES;
+TOK_WINDOWRANGE;
TOK_IGNOREPROTECTION;
}
@@ -392,7 +398,7 @@ import java.util.HashMap;
xlateMap.put("KW_COLLECTION", "COLLECTION");
xlateMap.put("KW_ITEMS", "ITEMS");
xlateMap.put("KW_KEYS", "KEYS");
- xlateMap.put("KW_KEY_TYPE", "$KEY$");
+ xlateMap.put("KW_KEY_TYPE", "\$KEY\$");
xlateMap.put("KW_LINES", "LINES");
xlateMap.put("KW_STORED", "STORED");
xlateMap.put("KW_SEQUENCEFILE", "SEQUENCEFILE");
@@ -420,8 +426,8 @@ import java.util.HashMap;
xlateMap.put("KW_LIMIT", "LIMIT");
xlateMap.put("KW_SET", "SET");
xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
- xlateMap.put("KW_VALUE_TYPE", "$VALUE$");
- xlateMap.put("KW_ELEM_TYPE", "$ELEM$");
+ xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$");
+ xlateMap.put("KW_ELEM_TYPE", "\$ELEM\$");
// Operators
xlateMap.put("DOT", ".");
@@ -446,7 +452,7 @@ import java.util.HashMap;
xlateMap.put("PLUS", "+");
xlateMap.put("MINUS", "-");
xlateMap.put("STAR", "*");
- xlateMap.put("MOD", "%");
+ xlateMap.put("MOD", "\%");
xlateMap.put("AMPERSAND", "&");
xlateMap.put("TILDE", "~");
@@ -1792,9 +1798,10 @@ regular_body
clusterByClause?
distributeByClause?
sortByClause?
+ window_clause?
limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT insertClause
selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? limitClause?))
+ distributeByClause? sortByClause? window_clause? limitClause?))
|
selectStatement
;
@@ -1810,9 +1817,10 @@ selectStatement
clusterByClause?
distributeByClause?
sortByClause?
+ window_clause?
limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? limitClause?))
+ distributeByClause? sortByClause? window_clause? limitClause?))
;
@@ -1827,9 +1835,10 @@ body
clusterByClause?
distributeByClause?
sortByClause?
+ window_clause?
limitClause? -> ^(TOK_INSERT insertClause?
selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? limitClause?)
+ distributeByClause? sortByClause? window_clause? limitClause?)
|
selectClause
whereClause?
@@ -1839,9 +1848,10 @@ body
clusterByClause?
distributeByClause?
sortByClause?
+ window_clause?
limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? limitClause?)
+ distributeByClause? sortByClause? window_clause? limitClause?)
;
insertClause
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Fri Apr 5 10:34:08 2013
@@ -124,7 +124,18 @@ clusterByClause
|
KW_CLUSTER KW_BY
expression
- ( COMMA expression )* -> ^(TOK_CLUSTERBY expression+)
+ ( (COMMA)=>COMMA expression )* -> ^(TOK_CLUSTERBY expression+)
+ ;
+
+partitionByClause
+@init { gParent.msgs.push("partition by clause"); }
+@after { gParent.msgs.pop(); }
+ :
+ KW_PARTITION KW_BY
+ LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_DISTRIBUTEBY expression+)
+ |
+ KW_PARTITION KW_BY
+ expression ((COMMA)=> COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
;
distributeByClause
@@ -135,7 +146,7 @@ distributeByClause
LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_DISTRIBUTEBY expression+)
|
KW_DISTRIBUTE KW_BY
- expression (COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
+ expression ((COMMA)=> COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+)
;
sortByClause
@@ -148,7 +159,7 @@ sortByClause
|
KW_SORT KW_BY
columnRefOrder
- ( COMMA columnRefOrder)* -> ^(TOK_SORTBY columnRefOrder+)
+ ( (COMMA)=> COMMA columnRefOrder)* -> ^(TOK_SORTBY columnRefOrder+)
;
// fun(par1, par2, par3)
@@ -501,5 +512,5 @@ identifier
nonReserved
:
- KW_TRUE | KW_FALSE | KW_ALL | KW_AND | KW_OR | KW_NOT | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_FROM | KW_AS | KW_DISTINCT | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_PRESERVE | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_COLUMN | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | K
W_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_
LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER
+ KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLE
CTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED |
KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER
;
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Fri Apr 5 10:34:08 2013
@@ -28,12 +28,15 @@ import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -74,6 +77,8 @@ public class ParseContext {
private Map<MapJoinOperator, QBJoinTree> mapJoinContext;
private Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
private HashMap<TableScanOperator, Table> topToTable;
+ private Map<FileSinkOperator, Table> fsopToTable;
+ private List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
private HashMap<String, SplitSample> nameToSplitSample;
private List<LoadTableDesc> loadTableWork;
private List<LoadFileDesc> loadFileWork;
@@ -109,6 +114,7 @@ public class ParseContext {
private List<Task<? extends Serializable>> rootTasks;
private FetchTask fetchTask;
+ private QueryProperties queryProperties;
public ParseContext() {
}
@@ -164,6 +170,7 @@ public class ParseContext {
Map<JoinOperator, QBJoinTree> joinContext,
Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext,
HashMap<TableScanOperator, Table> topToTable,
+ Map<FileSinkOperator, Table> fsopToTable,
List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork,
Context ctx, HashMap<String, String> idToTableNameMap, int destTableId,
UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer,
@@ -174,7 +181,9 @@ public class ParseContext {
HashMap<String, SplitSample> nameToSplitSample,
HashSet<ReadEntity> semanticInputs, List<Task<? extends Serializable>> rootTasks,
Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner,
- Map<String, ReadEntity> viewAliasToInput) {
+ Map<String, ReadEntity> viewAliasToInput,
+ List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting,
+ QueryProperties queryProperties) {
this.conf = conf;
this.qb = qb;
this.ast = ast;
@@ -183,6 +192,7 @@ public class ParseContext {
this.joinContext = joinContext;
this.smbMapJoinContext = smbMapJoinContext;
this.topToTable = topToTable;
+ this.fsopToTable = fsopToTable;
this.loadFileWork = loadFileWork;
this.loadTableWork = loadTableWork;
this.opParseCtx = opParseCtx;
@@ -203,6 +213,9 @@ public class ParseContext {
this.rootTasks = rootTasks;
this.opToPartToSkewedPruner = opToPartToSkewedPruner;
this.viewAliasToInput = viewAliasToInput;
+ this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+ reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ this.queryProperties = queryProperties;
}
/**
@@ -304,6 +317,24 @@ public class ParseContext {
this.topToTable = topToTable;
}
+ public Map<FileSinkOperator, Table> getFsopToTable() {
+ return fsopToTable;
+ }
+
+ public void setFsopToTable(Map<FileSinkOperator, Table> fsopToTable) {
+ this.fsopToTable = fsopToTable;
+ }
+
+ public List<ReduceSinkOperator> getReduceSinkOperatorsAddedByEnforceBucketingSorting() {
+ return reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ }
+
+ public void setReduceSinkOperatorsAddedByEnforceBucketingSorting(
+ List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting) {
+ this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+ reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ }
+
/**
* @return the topOps
*/
@@ -596,4 +627,12 @@ public class ParseContext {
public Map<String, ReadEntity> getViewAliasToInput() {
return viewAliasToInput;
}
+
+ public QueryProperties getQueryProperties() {
+ return queryProperties;
+ }
+
+ public void setQueryProperties(QueryProperties queryProperties) {
+ this.queryProperties = queryProperties;
+ }
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java Fri Apr 5 10:34:08 2013
@@ -203,4 +203,44 @@ public class ParseDriver {
return (ASTNode) r.getTree();
}
+
+
+ /*
+ * parse a String as a Select List. This allows table functions to be passed expression Strings
+ * that are translated in
+ * the context they define at invocation time. Currently used by NPath to allow users to specify
+ * what output they want.
+ * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This
+ * column doesn't exist in
+ * the input schema and hence the Result Expression cannot be analyzed by the regular Hive
+ * translation process.
+ */
+ public ASTNode parseSelect(String command, Context ctx) throws ParseException {
+ LOG.info("Parsing command: " + command);
+
+ HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
+ TokenRewriteStream tokens = new TokenRewriteStream(lexer);
+ if (ctx != null) {
+ ctx.setTokenRewriteStream(tokens);
+ }
+ HiveParser parser = new HiveParser(tokens);
+ parser.setTreeAdaptor(adaptor);
+ HiveParser_SelectClauseParser.selectClause_return r = null;
+ try {
+ r = parser.selectClause();
+ } catch (RecognitionException e) {
+ e.printStackTrace();
+ throw new ParseException(parser.errors);
+ }
+
+ if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
+ LOG.info("Parse Completed");
+ } else if (lexer.getErrors().size() != 0) {
+ throw new ParseException(lexer.getErrors());
+ } else {
+ throw new ParseException(parser.errors);
+ }
+
+ return (ASTNode) r.getTree();
+ }
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java Fri Apr 5 10:34:08 2013
@@ -31,7 +31,7 @@ import org.apache.hadoop.hive.ql.metadat
public class PrunedPartitionList {
// source table
- private Table source;
+ private final Table source;
// confirmed partitions - satisfy the partition criteria
private Set<Partition> confirmedPartns;
@@ -44,7 +44,7 @@ public class PrunedPartitionList {
/**
* @param confirmedPartns
- * confirmed paritions
+ * confirmed partitions
* @param unknownPartns
* unknown partitions
*/
@@ -62,7 +62,7 @@ public class PrunedPartitionList {
/**
* get confirmed partitions.
- *
+ *
* @return confirmedPartns confirmed paritions
*/
public Set<Partition> getConfirmedPartns() {
@@ -71,7 +71,7 @@ public class PrunedPartitionList {
/**
* get unknown partitions.
- *
+ *
* @return unknownPartns unknown paritions
*/
public Set<Partition> getUnknownPartns() {
@@ -80,7 +80,7 @@ public class PrunedPartitionList {
/**
* get denied partitions.
- *
+ *
* @return deniedPartns denied paritions
*/
public Set<Partition> getDeniedPartns() {
@@ -99,7 +99,7 @@ public class PrunedPartitionList {
/**
* set confirmed partitions.
- *
+ *
* @param confirmedPartns
* confirmed paritions
*/
@@ -109,7 +109,7 @@ public class PrunedPartitionList {
/**
* set unknown partitions.
- *
+ *
* @param unknownPartns
* unknown partitions
*/
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Fri Apr 5 10:34:08 2013
@@ -52,6 +52,16 @@ public class QB {
private boolean isAnalyzeRewrite;
private CreateTableDesc tblDesc = null; // table descriptor of the final
+ // used by PTFs
+ /*
+ * This map maintains the PTFInvocationSpec for each PTF chain invocation in this QB.
+ */
+ private HashMap<ASTNode, PTFInvocationSpec> ptfNodeToSpec;
+ /*
+ * the WindowingSpec used for windowing clauses in this QB.
+ */
+ private HashMap<String, WindowingSpec> destToWindowingSpec;
+
// results
public void print(String msg) {
@@ -76,6 +86,8 @@ public class QB {
}
qbp = new QBParseInfo(alias, isSubQ);
qbm = new QBMetaData();
+ ptfNodeToSpec = new HashMap<ASTNode, PTFInvocationSpec>();
+ destToWindowingSpec = new HashMap<String, WindowingSpec>();
id = getAppendedAliasFromId(outer_id, alias);
}
@@ -246,4 +258,35 @@ public class QB {
public void setAnalyzeRewrite(boolean isAnalyzeRewrite) {
this.isAnalyzeRewrite = isAnalyzeRewrite;
}
-}
+
+ public PTFInvocationSpec getPTFInvocationSpec(ASTNode node) {
+ return ptfNodeToSpec == null ? null : ptfNodeToSpec.get(node);
+ }
+
+ public void addPTFNodeToSpec(ASTNode node, PTFInvocationSpec spec) {
+ ptfNodeToSpec = ptfNodeToSpec == null ? new HashMap<ASTNode, PTFInvocationSpec>() : ptfNodeToSpec;
+ ptfNodeToSpec.put(node, spec);
+ }
+
+ public HashMap<ASTNode, PTFInvocationSpec> getPTFNodeToSpec() {
+ return ptfNodeToSpec;
+ }
+
+ public WindowingSpec getWindowingSpec(String dest) {
+ return destToWindowingSpec.get(dest);
+ }
+
+ public void addDestToWindowingSpec(String dest, WindowingSpec windowingSpec) {
+ destToWindowingSpec.put(dest, windowingSpec);
+ }
+
+ public boolean hasWindowingSpec(String dest) {
+ return destToWindowingSpec.get(dest) != null;
+ }
+
+ public HashMap<String, WindowingSpec> getAllWindowingSpecs() {
+ return destToWindowingSpec;
+ }
+
+
+}
\ No newline at end of file
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java Fri Apr 5 10:34:08 2013
@@ -68,6 +68,7 @@ public class QBParseInfo {
private String partName; // used for column statistics
private boolean isTblLvl; // used for column statistics
+
/**
* ClusterBy is a short name for both DistributeBy and SortBy.
*/
@@ -98,6 +99,10 @@ public class QBParseInfo {
private final LinkedHashMap<String, LinkedHashMap<String, ASTNode>> destToAggregationExprs;
private final HashMap<String, List<ASTNode>> destToDistinctFuncExprs;
+ // used by Windowing
+ private final LinkedHashMap<String, LinkedHashMap<String, ASTNode>> destToWindowingExprs;
+
+
@SuppressWarnings("unused")
private static final Log LOG = LogFactory.getLog(QBParseInfo.class.getName());
@@ -121,6 +126,7 @@ public class QBParseInfo {
destGroupingSets = new HashSet<String>();
destToAggregationExprs = new LinkedHashMap<String, LinkedHashMap<String, ASTNode>>();
+ destToWindowingExprs = new LinkedHashMap<String, LinkedHashMap<String, ASTNode>>();
destToDistinctFuncExprs = new HashMap<String, List<ASTNode>>();
this.alias = alias;
@@ -133,6 +139,14 @@ public class QBParseInfo {
}
+ /*
+ * If a QB is such that the aggregation expressions need to be handled by
+ * the Windowing PTF; we invoke this function to clear the AggExprs on the dest.
+ */
+ public void clearAggregationExprsForClause(String clause) {
+ destToAggregationExprs.get(clause).clear();
+ }
+
public void setAggregationExprsForClause(String clause,
LinkedHashMap<String, ASTNode> aggregationTrees) {
destToAggregationExprs.put(clause, aggregationTrees);
@@ -160,6 +174,26 @@ public class QBParseInfo {
return destToAggregationExprs.get(clause);
}
+ public void addWindowingExprToClause(String clause, ASTNode windowingExprNode) {
+ LinkedHashMap<String, ASTNode> windowingExprs = destToWindowingExprs.get(clause);
+ if ( windowingExprs == null ) {
+ windowingExprs = new LinkedHashMap<String, ASTNode>();
+ destToWindowingExprs.put(clause, windowingExprs);
+ }
+ windowingExprs.put(windowingExprNode.toStringTree(), windowingExprNode);
+ }
+
+ public HashMap<String, ASTNode> getWindowingExprsForClause(String clause) {
+ return destToWindowingExprs.get(clause);
+ }
+
+ public void clearDistinctFuncExprsForClause(String clause) {
+ List<ASTNode> l = destToDistinctFuncExprs.get(clause);
+ if ( l != null ) {
+ l.clear();
+ }
+ }
+
public void setDistinctFuncExprsForClause(String clause, List<ASTNode> ast) {
destToDistinctFuncExprs.put(clause, ast);
}
@@ -594,3 +628,5 @@ public class QBParseInfo {
this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand;
}
}
+
+
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g?rev=1464915&r1=1464914&r2=1464915&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g Fri Apr 5 10:34:08 2013
@@ -125,7 +125,9 @@ selectItem
@init { gParent.msgs.push("selection target"); }
@after { gParent.msgs.pop(); }
:
- ( selectExpression ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))?) -> ^(TOK_SELEXPR selectExpression identifier*)
+ ( selectExpression (KW_OVER ws=window_specification )?
+ ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))?
+ ) -> ^(TOK_SELEXPR selectExpression identifier* $ws?)
;
trfmClause
@@ -155,3 +157,64 @@ selectExpressionList
selectExpression (COMMA selectExpression)* -> ^(TOK_EXPLIST selectExpression+)
;
+//---------------------- Rules for windowing clauses -------------------------------
+window_clause
+@init { gParent.msgs.push("window_clause"); }
+@after { gParent.msgs.pop(); }
+:
+ KW_WINDOW window_defn (COMMA window_defn)* -> ^(KW_WINDOW window_defn+)
+;
+
+window_defn
+@init { gParent.msgs.push("window_defn"); }
+@after { gParent.msgs.pop(); }
+:
+ Identifier KW_AS window_specification -> ^(TOK_WINDOWDEF Identifier window_specification)
+;
+
+window_specification
+@init { gParent.msgs.push("window_specification"); }
+@after { gParent.msgs.pop(); }
+:
+ (Identifier | ( LPAREN Identifier? partitioningSpec? window_frame? RPAREN)) -> ^(TOK_WINDOWSPEC Identifier? partitioningSpec? window_frame?)
+;
+
+window_frame :
+ window_range_expression |
+ window_value_expression
+;
+
+window_range_expression
+@init { gParent.msgs.push("window_range_expression"); }
+@after { gParent.msgs.pop(); }
+:
+ KW_ROWS sb=window_frame_start_boundary -> ^(TOK_WINDOWRANGE $sb) |
+ KW_ROWS KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWRANGE $s $end)
+;
+
+window_value_expression
+@init { gParent.msgs.push("window_value_expression"); }
+@after { gParent.msgs.pop(); }
+:
+ KW_RANGE sb=window_frame_start_boundary -> ^(TOK_WINDOWVALUES $sb) |
+ KW_RANGE KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWVALUES $s $end)
+;
+
+window_frame_start_boundary
+@init { gParent.msgs.push("windowframestartboundary"); }
+@after { gParent.msgs.pop(); }
+:
+ KW_UNBOUNDED KW_PRECEDING -> ^(KW_PRECEDING KW_UNBOUNDED) |
+ KW_CURRENT KW_ROW -> ^(KW_CURRENT) |
+ Number KW_PRECEDING -> ^(KW_PRECEDING Number)
+;
+
+window_frame_boundary
+@init { gParent.msgs.push("windowframeboundary"); }
+@after { gParent.msgs.pop(); }
+:
+ KW_UNBOUNDED (r=KW_PRECEDING|r=KW_FOLLOWING) -> ^($r KW_UNBOUNDED) |
+ KW_CURRENT KW_ROW -> ^(KW_CURRENT) |
+ Number (d=KW_PRECEDING | d=KW_FOLLOWING ) -> ^($d Number)
+;
+