You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/10 21:18:29 UTC
svn commit: r834649 [1/2] - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/java/org/apache/hadoop/hive/ql/plan/
ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpos...
Author: namit
Date: Tue Nov 10 20:18:28 2009
New Revision: 834649
URL: http://svn.apache.org/viewvc?rev=834649&view=rev
Log:
HIVE-870. Add left semi join. (Ning Zhang via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q
hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q
hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out
hadoop/hive/trunk/ql/src/test/results/clientpositive/semijoin.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Nov 10 20:18:28 2009
@@ -77,6 +77,8 @@
HIVE-911. Add UDF WeekOfYear. (Paul Yang via zshao)
+ HIVE-870. Add left semi join. (Ning Zhang via namit)
+
IMPROVEMENTS
HIVE-760. Add version info to META-INF/MANIFEST.MF.
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Nov 10 20:18:28 2009
@@ -74,6 +74,10 @@
public void popObj() {
curSize--;
}
+
+ public Object topObj() {
+ return objs[curSize-1];
+ }
}
transient protected int numAliases; // number of aliases
@@ -97,7 +101,6 @@
// potential nulls for the concerned
// aliases
transient private ArrayList<ArrayList<Object>>[] dummyObjVectors;
- transient private Stack<Iterator<ArrayList<Object>>> iterators;
transient protected int totalSz; // total size of the composite object
// keys are the column names. basically this maps the position of the column in
@@ -217,9 +220,6 @@
dummyObjVectors[pos] = values;
pos++;
}
-
- iterators = new Stack<Iterator<ArrayList<Object>>>();
-
joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
forwardCache = new Object[totalSz];
@@ -309,6 +309,31 @@
}
return resNulls;
}
+
+ /**
+ * Implement semi join operator.
+ */
+ private ArrayList<boolean[]> joinObjectsLeftSemiJoin(ArrayList<boolean[]> resNulls,
+ ArrayList<boolean[]> inputNulls,
+ ArrayList<Object> newObj,
+ IntermediateObject intObj,
+ int left,
+ boolean newObjNull) {
+ if (newObjNull)
+ return resNulls;
+ Iterator<boolean[]> nullsIter = inputNulls.iterator();
+ while (nullsIter.hasNext()) {
+ boolean[] oldNulls = nullsIter.next();
+ boolean oldObjNull = oldNulls[left];
+ if (!oldObjNull) {
+ boolean[] newNulls = new boolean[intObj.getCurSize()];
+ copyOldArray(oldNulls, newNulls);
+ newNulls[oldNulls.length] = false;
+ resNulls.add(newNulls);
+ }
+ }
+ return resNulls;
+ }
private ArrayList<boolean[]> joinObjectsLeftOuterJoin(
ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
@@ -452,8 +477,8 @@
* inner join. The outer joins are processed appropriately.
*/
private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj,
- int joinPos, boolean firstRow) {
+ ArrayList<Object> newObj, IntermediateObject intObj,
+ int joinPos, boolean firstRow) {
ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>();
boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
if (joinPos == 0) {
@@ -491,6 +516,10 @@
else if (type == joinDesc.RIGHT_OUTER_JOIN)
return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
left, newObjNull, firstRow);
+ else if (type == joinDesc.LEFT_SEMI_JOIN)
+ return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj,
+ left, newObjNull);
+
assert (type == joinDesc.FULL_OUTER_JOIN);
return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left,
newObjNull, firstRow);
@@ -506,20 +535,40 @@
private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum,
IntermediateObject intObj, boolean firstRow) throws HiveException {
boolean childFirstRow = firstRow;
+ boolean skipping = false;
+
if (aliasNum < numAliases) {
- Iterator<ArrayList<Object>> aliasRes = storage.get(order[aliasNum])
- .iterator();
- iterators.push(aliasRes);
+
+ // search for match in the rhs table
+ Iterator<ArrayList<Object>> aliasRes = storage.get(order[aliasNum]).iterator();
while (aliasRes.hasNext()) {
+
ArrayList<Object> newObj = aliasRes.next();
+
+ // check for skipping in case of left semi join
+ if (aliasNum > 0 &&
+ condn[aliasNum - 1].getType() == joinDesc.LEFT_SEMI_JOIN &&
+ newObj != dummyObj[aliasNum] ) { // successful match
+ skipping = true;
+ }
+
intObj.pushObj(newObj);
- ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
- aliasNum, childFirstRow);
+
+ // execute the actual join algorithm
+ ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
+ aliasNum, childFirstRow);
+
+ // recursively call the join the other rhs tables
genObject(newNulls, aliasNum + 1, intObj, firstRow);
+
intObj.popObj();
firstRow = false;
+
+ // if left-semi-join found a match, skipping the rest of the rows in the rhs table of the semijoin
+ if ( skipping ) {
+ break;
+ }
}
- iterators.pop();
} else {
if (inputNulls == null)
return;
@@ -530,7 +579,7 @@
}
}
}
-
+
/**
* Forward a record of join results.
*
@@ -538,6 +587,8 @@
*/
public void endGroup() throws HiveException {
LOG.trace("Join Op: endGroup called: numValues=" + numAliases);
+
+
checkAndGenObject();
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java Tue Nov 10 20:18:28 2009
@@ -135,6 +135,7 @@
while (true) {
InspectableObject row = fetchOp.getNextRow();
if (row == null) {
+ forwardOp.close(false);
break;
}
fetchOpRows++;
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Tue Nov 10 20:18:28 2009
@@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Vector;
@@ -646,16 +647,25 @@
}
public String dump(int level) {
+ return dump(level, new HashSet<Integer>());
+ }
+
+ public String dump(int level, HashSet<Integer> seenOpts) {
+ if ( seenOpts.contains(new Integer(id)))
+ return null;
+ seenOpts.add(new Integer(id));
+
StringBuilder s = new StringBuilder();
String ls = getLevelString(level);
s.append(ls);
s.append("<" + getName() + ">");
s.append("Id =" + id);
+
if (childOperators != null) {
s.append(ls);
s.append(" <Children>");
for (Operator<? extends Serializable> o : childOperators) {
- s.append(o.dump(level+2));
+ s.append(o.dump(level+2, seenOpts));
}
s.append(ls);
s.append(" <\\Children>");
@@ -666,6 +676,7 @@
s.append(" <Parent>");
for (Operator<? extends Serializable> o : parentOperators) {
s.append("Id = " + o.id + " ");
+ s.append(o.dump(level,seenOpts));
}
s.append("<\\Parent>");
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Tue Nov 10 20:18:28 2009
@@ -62,4 +62,24 @@
public String getName() {
return (new Integer(super.getToken().getType())).toString();
}
+
+ public String dump() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append('(');
+ sb.append(this.toString());
+ Vector<Node> children = getChildren();
+ if ( children != null ) {
+ for ( Node node : getChildren() ) {
+ if ( node instanceof ASTNode ) {
+ sb.append(((ASTNode) node).dump());
+ } else {
+ sb.append("NON-ASTNODE!!");
+ }
+ }
+ }
+ sb.append(')');
+ return sb.toString();
+ }
+
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Tue Nov 10 20:18:28 2009
@@ -143,6 +143,7 @@
TOK_USERSCRIPTCOLSCHEMA;
TOK_RECORDREADER;
TOK_RECORDWRITER;
+TOK_LEFTSEMIJOIN;
}
@@ -891,9 +892,10 @@
@after { msgs.pop(); }
:
KW_JOIN -> TOK_JOIN
- | KW_LEFT KW_OUTER KW_JOIN -> TOK_LEFTOUTERJOIN
+ | KW_LEFT KW_OUTER KW_JOIN -> TOK_LEFTOUTERJOIN
| KW_RIGHT KW_OUTER KW_JOIN -> TOK_RIGHTOUTERJOIN
- | KW_FULL KW_OUTER KW_JOIN -> TOK_FULLOUTERJOIN
+ | KW_FULL KW_OUTER KW_JOIN -> TOK_FULLOUTERJOIN
+ | KW_LEFT KW_SEMI KW_JOIN -> TOK_LEFTSEMIJOIN
;
fromSource
@@ -1436,6 +1438,7 @@
KW_TRIGGER: 'TRIGGER';
KW_RECORDREADER: 'RECORDREADER';
KW_RECORDWRITER: 'RECORDWRITER';
+KW_SEMI: 'SEMI';
// Operators
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Tue Nov 10 20:18:28 2009
@@ -18,8 +18,11 @@
package org.apache.hadoop.hive.ql.parse;
+import java.util.HashMap;
import java.util.Vector;
import java.util.List;
+import java.util.ArrayList;
+import java.util.Map.Entry;
/**
* Internal representation of the join tree
@@ -35,6 +38,10 @@
private int nextTag;
private joinCond[] joinCond;
private boolean noOuterJoin;
+ private boolean noSemiJoin;
+
+ // keeps track of the right-hand-side table name of the left-semi-join, and its list of join keys
+ private HashMap<String, ArrayList<ASTNode>> rhsSemijoin;
// join conditions
private Vector<Vector<ASTNode>> expressions;
@@ -52,7 +59,12 @@
/**
* constructor
*/
- public QBJoinTree() { nextTag = 0;}
+ public QBJoinTree() {
+ nextTag = 0;
+ noOuterJoin = true;
+ noSemiJoin = true;
+ rhsSemijoin = new HashMap<String, ArrayList<ASTNode>>();
+ }
/**
* returns left alias if any - this is used for merging later on
@@ -133,20 +145,28 @@
public void setNoOuterJoin(boolean noOuterJoin) {
this.noOuterJoin = noOuterJoin;
}
+
+ public boolean getNoSemiJoin() {
+ return noSemiJoin;
+ }
- /**
- * @return the filters
- */
- public Vector<Vector<ASTNode>> getFilters() {
- return filters;
- }
-
- /**
- * @param filters the filters to set
- */
- public void setFilters(Vector<Vector<ASTNode>> filters) {
- this.filters = filters;
- }
+ public void setNoSemiJoin(boolean semi) {
+ this.noSemiJoin = semi;
+ }
+
+ /**
+ * @return the filters
+ */
+ public Vector<Vector<ASTNode>> getFilters() {
+ return filters;
+ }
+
+ /**
+ * @param filters the filters to set
+ */
+ public void setFilters(Vector<Vector<ASTNode>> filters) {
+ this.filters = filters;
+ }
/**
* @return the mapSidejoin
@@ -183,6 +203,66 @@
public void setStreamAliases(List<String> streamAliases) {
this.streamAliases = streamAliases;
}
+
+ /**
+ * Insert only a key to the semijoin table name to column names map.
+ * @param alias table name alias.
+ */
+ public void addRHSSemijoin(String alias) {
+ if ( ! rhsSemijoin.containsKey(alias) ) {
+ rhsSemijoin.put(alias, null);
+ }
+ }
+
+ /**
+ * Remeber the mapping of table alias to set of columns.
+ * @param alias
+ * @param columns
+ */
+ public void addRHSSemijoinColumns(String alias, ArrayList<ASTNode> columns) {
+ ArrayList<ASTNode> cols = rhsSemijoin.get(alias);
+ if ( cols == null ) {
+ rhsSemijoin.put(alias, columns);
+ } else {
+ cols.addAll(columns);
+ }
+ }
+
+ /**
+ * Remeber the mapping of table alias to set of columns.
+ * @param alias
+ * @param columns
+ */
+ public void addRHSSemijoinColumns(String alias, ASTNode column) {
+ ArrayList<ASTNode> cols = rhsSemijoin.get(alias);
+ if ( cols == null ) {
+ cols = new ArrayList<ASTNode>();
+ cols.add(column);
+ rhsSemijoin.put(alias, cols);
+ } else {
+ cols.add(column);
+ }
+ }
+
+ public ArrayList<ASTNode> getRHSSemijoinColumns(String alias) {
+ return rhsSemijoin.get(alias);
+ }
+
+ /**
+ * Merge the rhs tables from another join tree.
+ * @param src the source join tree
+ */
+ public void mergeRHSSemijoin(QBJoinTree src) {
+ for (Entry<String, ArrayList<ASTNode>> e: src.rhsSemijoin.entrySet()) {
+ String key = e.getKey();
+ ArrayList<ASTNode> value = this.rhsSemijoin.get(key);
+ if ( value == null ) {
+ this.rhsSemijoin.put(key, e.getValue());
+ } else {
+ value.addAll(e.getValue());
+ }
+ }
+ }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 10 20:18:28 2009
@@ -426,10 +426,11 @@
private boolean isJoinToken(ASTNode node)
{
- if ((node.getToken().getType() == HiveParser.TOK_JOIN) ||
- (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) ||
+ if ((node.getToken().getType() == HiveParser.TOK_JOIN) ||
+ (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) ||
(node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) ||
- (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) ||
+ (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) ||
+ (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) ||
(node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN))
return true;
@@ -725,7 +726,8 @@
@SuppressWarnings("nls")
private void parseJoinCondPopulateAlias(QBJoinTree joinTree,
- ASTNode condn, Vector<String> leftAliases, Vector<String> rightAliases)
+ ASTNode condn, Vector<String> leftAliases, Vector<String> rightAliases,
+ ArrayList<String> fields)
throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
@@ -744,9 +746,14 @@
}
break;
+ case HiveParser.Identifier:
+ // it may be a field name, return the identifier and let the caller decide whether it is or not
+ if ( fields != null ) {
+ fields.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
+ }
+ break;
case HiveParser.Number:
case HiveParser.StringLiteral:
- case HiveParser.Identifier:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
@@ -756,19 +763,42 @@
// check all the arguments
for (int i = 1; i < condn.getChildCount(); i++)
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
- leftAliases, rightAliases);
+ leftAliases, rightAliases, null);
break;
default:
// This is an operator - so check whether it is unary or binary operator
if (condn.getChildCount() == 1)
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
- leftAliases, rightAliases);
- else if (condn.getChildCount() == 2) {
- parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
- leftAliases, rightAliases);
- parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
- leftAliases, rightAliases);
+ leftAliases, rightAliases, null);
+ else if (condn.getChildCount() == 2) {
+
+ ArrayList<String> fields1 = null;
+ // if it is a dot operator, remember the field name of the rhs of the left semijoin
+ if (joinTree.getNoSemiJoin() == false &&
+ condn.getToken().getText().equals("." )) {
+ // get the semijoin rhs table name and field name
+ fields1 = new ArrayList<String>();
+ int rhssize = rightAliases.size();
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+ leftAliases, rightAliases, null);
+ String rhsAlias = null;
+
+ if ( rightAliases.size() > rhssize ) { // the new table is rhs table
+ rhsAlias = rightAliases.get(rightAliases.size()-1);
+ }
+
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+ leftAliases, rightAliases, fields1);
+ if ( rhsAlias != null && fields1.size() > 0 ) {
+ joinTree.addRHSSemijoinColumns(rhsAlias, condn);
+ }
+ } else {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+ leftAliases, rightAliases, null);
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+ leftAliases, rightAliases, fields1);
+ }
} else
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
@@ -827,12 +857,12 @@
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
Vector<String> leftCondAl1 = new Vector<String>();
Vector<String> leftCondAl2 = new Vector<String>();
- parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2);
+ parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
Vector<String> rightCondAl1 = new Vector<String>();
Vector<String> rightCondAl2 = new Vector<String>();
- parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2);
+ parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null);
// is it a filter or a join condition
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) ||
@@ -877,7 +907,7 @@
}
for (int ci=childrenBegin; ci<joinCond.getChildCount(); ci++)
- parseJoinCondPopulateAlias(joinTree, (ASTNode)joinCond.getChild(ci), leftAlias.get(ci-childrenBegin), rightAlias.get(ci-childrenBegin));
+ parseJoinCondPopulateAlias(joinTree, (ASTNode)joinCond.getChild(ci), leftAlias.get(ci-childrenBegin), rightAlias.get(ci-childrenBegin), null);
boolean leftAliasNull = true;
for (Vector<String> left : leftAlias) {
@@ -2951,61 +2981,67 @@
return output;
}
- private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right)
+ private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right,
+ HashSet<Integer> omitOpts)
throws SemanticException {
+
RowResolver outputRS = new RowResolver();
ArrayList<String> outputColumnNames = new ArrayList<String>();
// all children are base classes
Operator<?>[] rightOps = new Operator[right.length];
- int pos = 0;
int outputPos = 0;
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
HashMap<Byte, List<exprNodeDesc>> exprMap = new HashMap<Byte, List<exprNodeDesc>>();
Map<String, exprNodeDesc> colExprMap = new HashMap<String, exprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
- for (Operator input : right)
- {
- ArrayList<exprNodeDesc> keyDesc = new ArrayList<exprNodeDesc>();
+
+ for ( int pos = 0; pos < right.length; ++pos ) {
+
+ Operator input = right[pos];
if (input == null)
input = left;
+
+ ArrayList<exprNodeDesc> keyDesc = new ArrayList<exprNodeDesc>();
Byte tag = Byte.valueOf((byte)(((reduceSinkDesc)(input.getConf())).getTag()));
- RowResolver inputRS = opParseCtx.get(input).getRR();
- Iterator<String> keysIter = inputRS.getTableNames().iterator();
- Set<String> aliases = posToAliasMap.get(pos);
- if(aliases == null) {
- aliases = new HashSet<String>();
- posToAliasMap.put(pos, aliases);
- }
-
- while (keysIter.hasNext())
- {
- String key = keysIter.next();
- aliases.add(key);
- HashMap<String, ColumnInfo> map = inputRS.getFieldMap(key);
- Iterator<String> fNamesIter = map.keySet().iterator();
- while (fNamesIter.hasNext())
- {
- String field = fNamesIter.next();
- ColumnInfo valueInfo = inputRS.get(key, field);
- keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(),
- valueInfo.getInternalName(),
- valueInfo.getTabAlias(),
- valueInfo.getIsPartitionCol()));
- if (outputRS.get(key, field) == null) {
- String colName = getColumnInternalName(outputPos);
- outputPos++;
- outputColumnNames.add(colName);
- colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1));
- outputRS.put(key, field, new ColumnInfo(colName,
- valueInfo.getType(), key, false));
- reversedExprs.put(colName, tag);
+
+ // check whether this input operator produces output
+ if ( omitOpts == null || !omitOpts.contains(pos) ) {
+ // prepare output descriptors for the input opt
+ RowResolver inputRS = opParseCtx.get(input).getRR();
+ Iterator<String> keysIter = inputRS.getTableNames().iterator();
+ Set<String> aliases = posToAliasMap.get(pos);
+ if(aliases == null) {
+ aliases = new HashSet<String>();
+ posToAliasMap.put(pos, aliases);
+ }
+ while (keysIter.hasNext()) {
+ String key = keysIter.next();
+ aliases.add(key);
+ HashMap<String, ColumnInfo> map = inputRS.getFieldMap(key);
+ Iterator<String> fNamesIter = map.keySet().iterator();
+ while (fNamesIter.hasNext()) {
+ String field = fNamesIter.next();
+ ColumnInfo valueInfo = inputRS.get(key, field);
+ keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(),
+ valueInfo.getInternalName(),
+ valueInfo.getTabAlias(),
+ valueInfo.getIsPartitionCol()));
+
+ if (outputRS.get(key, field) == null) {
+ String colName = getColumnInternalName(outputPos);
+ outputPos++;
+ outputColumnNames.add(colName);
+ colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1));
+ outputRS.put(key, field, new ColumnInfo(colName,
+ valueInfo.getType(), key, false));
+ reversedExprs.put(colName, tag);
+ }
}
}
- }
-
+ }
exprMap.put(tag, keyDesc);
- rightOps[pos++] = input;
+ rightOps[pos] = input;
}
org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join.getJoinCond().length];
@@ -3101,10 +3137,30 @@
}
Operator[] srcOps = new Operator[joinTree.getBaseSrc().length];
+
+ HashSet<Integer> omitOpts = null; // set of input to the join that should be omitted by the output
int pos = 0;
for (String src : joinTree.getBaseSrc()) {
if (src != null) {
Operator srcOp = map.get(src);
+
+ // for left-semi join, generate an additional selection & group-by operator before ReduceSink
+ ArrayList<ASTNode> fields = joinTree.getRHSSemijoinColumns(src);
+ if ( fields != null ) {
+ // the RHS table columns should be not be output from the join
+ if ( omitOpts == null ) {
+ omitOpts = new HashSet<Integer>();
+ }
+ omitOpts.add(pos);
+
+ // generate a selection operator for group-by keys only
+ srcOp = insertSelectForSemijoin(fields, srcOp);
+
+ // generate a groupby operator (HASH mode) for a map-side partial aggregation for semijoin
+ srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, groupByDesc.Mode.HASH);
+ }
+
+ // generate a ReduceSink operator for the join
srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos);
pos++;
} else {
@@ -3116,10 +3172,139 @@
// Type checking and implicit type conversion for join keys
genJoinOperatorTypeCheck(joinSrcOp, srcOps);
- JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps);
+ JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts);
joinContext.put(joinOp, joinTree);
return joinOp;
}
+
+ /**
+ * Construct a selection operator for semijoin that filter out all fields other than the group by keys.
+ *
+ * @param fields list of fields need to be output
+ * @param input input operator
+ * @return the selection operator.
+ * @throws SemanticException
+ */
+ private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields, Operator input)
+ throws SemanticException {
+
+ RowResolver inputRR = opParseCtx.get(input).getRR();
+ ArrayList<exprNodeDesc> colList = new ArrayList<exprNodeDesc>();
+ ArrayList<String> columnNames = new ArrayList<String>();
+
+ // construct the list of columns that need to be projected
+ for (ASTNode field: fields) {
+ exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, inputRR);
+ colList.add(exprNode);
+ columnNames.add(exprNode.getColumn());
+ }
+
+ // create selection operator
+ Operator output = putOpInsertMap(
+ OperatorFactory.getAndMakeChild(
+ new selectDesc(colList, columnNames, false),
+ new RowSchema(inputRR.getColumnInfos()),
+ input),
+ inputRR);
+
+ output.setColumnExprMap(input.getColumnExprMap());
+ return output;
+ }
+
+ private Operator genMapGroupByForSemijoin(QB qb,
+ ArrayList<ASTNode> fields, // the ASTNode of the join key "tab.col"
+ Operator inputOperatorInfo,
+ groupByDesc.Mode mode)
+ throws SemanticException {
+
+ RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR();
+ RowResolver groupByOutputRowResolver = new RowResolver();
+ ArrayList<exprNodeDesc> groupByKeys = new ArrayList<exprNodeDesc>();
+ ArrayList<String> outputColumnNames = new ArrayList<String>();
+ ArrayList<aggregationDesc> aggregations = new ArrayList<aggregationDesc>();
+ Map<String, exprNodeDesc> colExprMap = new HashMap<String, exprNodeDesc>();
+ QBParseInfo parseInfo = qb.getParseInfo();
+
+ groupByOutputRowResolver.setIsExprResolver(true); // join keys should only be columns but not be expressions
+
+ for (int i = 0; i < fields.size(); ++i) {
+ // get the group by keys to ColumnInfo
+ ASTNode colName = fields.get(i);
+ exprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
+ groupByKeys.add(grpByExprNode);
+
+ // generate output column names
+ String field = getColumnInternalName(i);
+ outputColumnNames.add(field);
+ ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
+ groupByOutputRowResolver.put("", colName.toStringTree(), colInfo2);
+
+ // establish mapping from the output column to the input column
+ colExprMap.put(field, grpByExprNode);
+ }
+
+ // Generate group-by operator
+ Operator op = putOpInsertMap(
+ OperatorFactory.getAndMakeChild(
+ new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false),
+ new RowSchema(groupByOutputRowResolver.getColumnInfos()),
+ inputOperatorInfo),
+ groupByOutputRowResolver);
+
+ op.setColumnExprMap(colExprMap);
+ return op;
+ }
+
+ private Operator genReduceSinkForSemijoin(QB qb,
+ ArrayList<ASTNode> fields, // semijoin key for the rhs table
+ Operator inputOperatorInfo)
+ throws SemanticException {
+
+ RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR();
+ QBParseInfo parseInfo = qb.getParseInfo();
+ RowResolver reduceSinkOutputRowResolver = new RowResolver();
+ Map<String, exprNodeDesc> colExprMap = new HashMap<String, exprNodeDesc>();
+ ArrayList<exprNodeDesc> reduceKeys = new ArrayList<exprNodeDesc>();
+ List<String> outputColumnNames = new ArrayList<String>();
+
+ reduceSinkOutputRowResolver.setIsExprResolver(true);
+
+ // Pre-compute group-by keys and store in reduceKeys
+ for (int i = 0; i < fields.size(); ++i) {
+ // based on the input row resolver, resolve the column names and construct expression node descriptors
+ ASTNode colName = fields.get(i);
+ exprNodeDesc inputExpr = genExprNodeDesc(colName, reduceSinkInputRowResolver);
+
+ reduceKeys.add(inputExpr);
+
+ // create new ColumnInfos for the groupby columns and put them into the output row resolver
+ if (reduceSinkOutputRowResolver.get("", colName.toStringTree()) == null) {
+ outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
+ String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1);
+ ColumnInfo colInfo1 = new ColumnInfo(field,
+ reduceKeys.get(reduceKeys.size()-1).getTypeInfo(),
+ null, false);
+ reduceSinkOutputRowResolver.put("", colName.toStringTree(), colInfo1);
+ colExprMap.put(colInfo1.getInternalName(), inputExpr);
+ } else {
+ throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg());
+ }
+ }
+
+ // SEMIJOIN HAS NO AGGREGATIONS, and we don't really use reduce values, so leave it as an empty list
+ ArrayList<exprNodeDesc> reduceValues = new ArrayList<exprNodeDesc>();
+ int numPartitionFields = fields.size();
+
+ // finally generate the ReduceSink operator
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
+ OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, -1),
+ new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()),
+ inputOperatorInfo),
+ reduceSinkOutputRowResolver);
+ rsOp.setColumnExprMap(colExprMap);
+
+ return rsOp;
+ }
private void genJoinOperatorTypeCheck(Operator left, Operator[] right) throws SemanticException {
// keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list
@@ -3311,26 +3496,28 @@
throws SemanticException {
QBJoinTree joinTree = new QBJoinTree();
joinCond[] condn = new joinCond[1];
-
- if (joinParseTree.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
- {
+
+ switch (joinParseTree.getToken().getType() ) {
+ case HiveParser.TOK_LEFTOUTERJOIN:
joinTree.setNoOuterJoin(false);
condn[0] = new joinCond(0, 1, joinType.LEFTOUTER);
- }
- else if (joinParseTree.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
- {
+ break;
+ case HiveParser.TOK_RIGHTOUTERJOIN:
joinTree.setNoOuterJoin(false);
condn[0] = new joinCond(0, 1, joinType.RIGHTOUTER);
- }
- else if (joinParseTree.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
- {
+ break;
+ case HiveParser.TOK_FULLOUTERJOIN:
joinTree.setNoOuterJoin(false);
condn[0] = new joinCond(0, 1, joinType.FULLOUTER);
- }
- else
- {
+ break;
+ case HiveParser.TOK_LEFTSEMIJOIN:
+ joinTree.setNoSemiJoin(false);
+ condn[0] = new joinCond(0, 1, joinType.LEFTSEMI);
+ break;
+ default:
condn[0] = new joinCond(0, 1, joinType.INNER);
joinTree.setNoOuterJoin(true);
+ break;
}
joinTree.setJoinCond(condn);
@@ -3376,6 +3563,10 @@
children = new String[2];
children[1] = alias;
joinTree.setBaseSrc(children);
+ // remember rhs table for semijoin
+ if (joinTree.getNoSemiJoin() == false) {
+ joinTree.addRHSSemijoin(alias);
+ }
} else
assert false;
@@ -3493,6 +3684,13 @@
else
target.setNoOuterJoin(false);
+ if (node.getNoSemiJoin() && target.getNoSemiJoin())
+ target.setNoSemiJoin(true);
+ else
+ target.setNoSemiJoin(false);
+
+ target.mergeRHSSemijoin(node);
+
joinCond[] nodeCondns = node.getJoinCond();
int nodeCondnsSize = nodeCondns.length;
joinCond[] targetCondns = target.getJoinCond();
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java Tue Nov 10 20:18:28 2009
@@ -18,4 +18,4 @@
package org.apache.hadoop.hive.ql.parse;
-public enum joinType {INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE};
+public enum joinType {INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE, LEFTSEMI};
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java Tue Nov 10 20:18:28 2009
@@ -20,6 +20,7 @@
import java.io.Serializable;
import java.util.Vector;
+import org.apache.hadoop.hive.ql.parse.joinType;
/**
* Join conditions Descriptor implementation.
@@ -44,19 +45,28 @@
this.left = condn.getLeft();
this.right = condn.getRight();
this.preserved = condn.getPreserved();
- org.apache.hadoop.hive.ql.parse.joinType itype = condn.getJoinType();
- if (itype == org.apache.hadoop.hive.ql.parse.joinType.INNER)
+ switch ( condn.getJoinType() ) {
+ case INNER:
this.type = joinDesc.INNER_JOIN;
- else if (itype == org.apache.hadoop.hive.ql.parse.joinType.LEFTOUTER)
+ break;
+ case LEFTOUTER:
this.type = joinDesc.LEFT_OUTER_JOIN;
- else if (itype == org.apache.hadoop.hive.ql.parse.joinType.RIGHTOUTER)
+ break;
+ case RIGHTOUTER:
this.type = joinDesc.RIGHT_OUTER_JOIN;
- else if (itype == org.apache.hadoop.hive.ql.parse.joinType.FULLOUTER)
+ break;
+ case FULLOUTER:
this.type = joinDesc.FULL_OUTER_JOIN;
- else if (itype == org.apache.hadoop.hive.ql.parse.joinType.UNIQUE)
+ break;
+ case UNIQUE:
this.type = joinDesc.UNIQUE_JOIN;
- else
+ break;
+ case LEFTSEMI:
+ this.type = joinDesc.LEFT_SEMI_JOIN;
+ break;
+ default:
assert false;
+ }
}
/**
@@ -117,8 +127,11 @@
case joinDesc.UNIQUE_JOIN:
sb.append("Unique Join");
break;
+ case joinDesc.LEFT_SEMI_JOIN:
+ sb.append("Left Semi Join ");
+ break;
default:
- sb.append("Unknow Join");
+ sb.append("Unknow Join ");
break;
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java Tue Nov 10 20:18:28 2009
@@ -37,11 +37,12 @@
@explain(displayName="Join Operator")
public class joinDesc implements Serializable {
private static final long serialVersionUID = 1L;
- public static final int INNER_JOIN = 0;
- public static final int LEFT_OUTER_JOIN = 1;
+ public static final int INNER_JOIN = 0;
+ public static final int LEFT_OUTER_JOIN = 1;
public static final int RIGHT_OUTER_JOIN = 2;
- public static final int FULL_OUTER_JOIN = 3;
- public static final int UNIQUE_JOIN = 4;
+ public static final int FULL_OUTER_JOIN = 3;
+ public static final int UNIQUE_JOIN = 4;
+ public static final int LEFT_SEMI_JOIN = 5;
// alias to key mapping
private Map<Byte, List<exprNodeDesc>> exprs;
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- reference rhs of semijoin in select-clause
+select b.value from src a left semi join src b on (b.key = a.key and b.key = '100');
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- rhs table reference in the where clause
+select a.value from src a left semi join src b on a.key = b.key where b.value = 'val_18';
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- rhs table reference in group by
+select * from src a left semi join src b on a.key = b.key group by b.value;
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,3 @@
+-- rhs table is a view and reference the view in where clause
+select a.value from src a left semi join (select key , value from src where key > 100) b on a.key = b.key where b.value = 'val_108' ;
+
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,83 @@
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+
+create table t1 as select cast(key as int) key, value from src where key <= 10;
+
+select * from t1 sort by key;
+
+create table t2 as select cast(2*key as int) key, value from t1;
+
+select * from t2 sort by key;
+
+create table t3 as select * from (select * from t1 union all select * from t2) b;
+select * from t3 sort by key, value;
+
+create table t4 (key int, value string);
+select * from t4;
+
+explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value;
+
+explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value;
+select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value;
+
+explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value;
+select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value;
+
+explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value;
+select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value;
+
+explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value;
+
+explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value;
+select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value;
+
+explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ;
+select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ;
+
+explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value;
+select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value;
+
+explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key;
+select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key;
+
+explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value;
+
+explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value;
+select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value;
+
+explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value;
+select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value;
+
+explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key;
+select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key;
+
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:7 Invalid Table Alias or Column Reference b
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:70 Invalid Table Alias or Column Reference b
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:67 Invalid Table Alias or Column Reference b
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:112 Invalid Table Alias or Column Reference b