You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dh...@apache.org on 2008/12/31 21:28:48 UTC
svn commit: r730467 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
Author: dhruba
Date: Wed Dec 31 12:28:48 2008
New Revision: 730467
URL: http://svn.apache.org/viewvc?rev=730467&view=rev
Log:
HIVE-202. Fix Lineage for join queries.
(Suresh Antony via dhruba)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Dec 31 12:28:48 2008
@@ -170,3 +170,6 @@
HIVE-196. Two test runs can run simultaneously on the same machine.
(Ashish Thusoo via dhruba)
+ HIVE-202. Fix Lineage for join queries.
+ (Suresh Antony via dhruba)
+
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java Wed Dec 31 12:28:48 2008
@@ -33,7 +33,6 @@
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
@@ -50,82 +49,75 @@
*/
public class LineageInfo implements NodeProcessor {
- /**
- * Stores input tables in sql
- */
- TreeSet<String> inputTableList = new TreeSet<String>();
- /**
- * Stores output tables in sql
- */
- TreeSet<String> OutputTableList= new TreeSet<String>();
-
- /**
- *
- * @return java.util.TreeSet
- */
- public TreeSet<String> getInputTableList() {
- return inputTableList;
- }
-
- /**
- * @return java.util.TreeSet
- */
- public TreeSet<String> getOutputTableList() {
- return OutputTableList;
- }
-
- /**
- * Implements the process method for the NodeProcessor interface.
- */
+ /**
+ * Stores input tables in sql
+ */
+ TreeSet<String> inputTableList = new TreeSet<String>();
+ /**
+ * Stores output tables in sql
+ */
+ TreeSet<String> OutputTableList= new TreeSet<String>();
+
+ /**
+ *
+ * @return java.util.TreeSet
+ */
+ public TreeSet<String> getInputTableList() {
+ return inputTableList;
+ }
+
+ /**
+ * @return java.util.TreeSet
+ */
+ public TreeSet<String> getOutputTableList() {
+ return OutputTableList;
+ }
+
+ /**
+ * Implements the process method for the NodeProcessor interface.
+ */
@Override
public void process(Node nd, NodeProcessorCtx procCtx)
- throws SemanticException {
+ throws SemanticException {
ASTNode pt = (ASTNode)nd;
- switch (pt.getToken().getType()) {
- case HiveParser.TOK_DESTINATION: {
- if (pt.getChild(0).getType() == HiveParser.TOK_TAB) {
- OutputTableList.add(pt.getChild(0).getChild(0).getText()) ;
- }
+ switch (pt.getToken().getType()) {
+ case HiveParser.TOK_TAB:
+ OutputTableList.add(pt.getChild(0).getText()) ;
+ break;
+
+ case HiveParser.TOK_TABREF:
+ String table_name = ((ASTNode)pt.getChild(0)).getText();
+ inputTableList.add(table_name);
+ break;
}
- break;
- case HiveParser.TOK_FROM: {
- if (((ASTNode)pt.getChild(0)).getToken().getType() == HiveParser.TOK_TABREF) {
- ASTNode tabRef = (ASTNode) pt.getChild(0);
- String table_name = tabRef.getChild(0).getText();
- inputTableList.add(table_name);
- }
- }
- break;
- }
-
+
}
-
- /**
- * parses given query and gets the lineage info.
- * @param query
- * @throws ParseException
- */
- public void getLineageInfo(String query) throws ParseException, SemanticException
- {
-
- /*
- * Get the AST tree
- */
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(query);
-
- while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
- tree = (ASTNode) tree.getChild(0);
- }
-
- /*
- * initialize Event Processor and dispatcher.
- */
- inputTableList.clear();
- OutputTableList.clear();
-
+
+ /**
+ * parses given query and gets the lineage info.
+ * @param query
+ * @throws ParseException
+ */
+ public void getLineageInfo(String query) throws ParseException, SemanticException {
+
+ /*
+ * Get the AST tree
+ */
+ ParseDriver pd = new ParseDriver();
+ ASTNode tree = pd.parse(query);
+
+ while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
+ tree = (ASTNode) tree.getChild(0);
+ }
+
+ /*
+ * initialize Event Processor and dispatcher.
+ */
+ inputTableList.clear();
+ OutputTableList.clear();
+
// create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
@@ -133,28 +125,28 @@
// The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
-
+
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(tree);
ogw.startWalking(topNodes);
- }
+ }
- public static void main(String[] args) throws IOException, ParseException,
- SemanticException {
+ public static void main(String[] args) throws IOException, ParseException,
+ SemanticException {
- String query = args[0];
+ String query = args[0];
- LineageInfo lep = new LineageInfo();
+ LineageInfo lep = new LineageInfo();
- lep.getLineageInfo(query);
+ lep.getLineageInfo(query);
- for (String tab : lep.getInputTableList()) {
- System.out.println("InputTable=" + tab);
- }
+ for (String tab : lep.getInputTableList()) {
+ System.out.println("InputTable=" + tab);
+ }
- for (String tab : lep.getOutputTableList()) {
- System.out.println("OutputTable=" + tab);
- }
- }
+ for (String tab : lep.getOutputTableList()) {
+ System.out.println("OutputTable=" + tab);
+ }
+ }
}
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java Wed Dec 31 12:28:48 2008
@@ -28,86 +28,101 @@
/**
* Checks whether the test outputs match the expected outputs
- * @param lep The LineageInfo extracted from the test
- * @param i The set of input tables
- * @param o The set of output tables
+ *
+ * @param lep
+ * The LineageInfo extracted from the test
+ * @param i
+ * The set of input tables
+ * @param o
+ * The set of output tables
*/
private void checkOutput(LineageInfo lep, TreeSet<String> i, TreeSet<String> o) {
-
- if ( !i.equals(lep.getInputTableList())){
+
+ if (!i.equals(lep.getInputTableList())) {
fail("Input table not same");
}
- if (! o.equals(lep.getOutputTableList())){
+ if (!o.equals(lep.getOutputTableList())) {
fail("Output table not same");
- }
+ }
+ }
+
+ public void testSimpleQuery() {
+ LineageInfo lep = new LineageInfo();
+ try {
+ lep
+ .getLineageInfo("INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("srcpart");
+ o.add("dest1");
+ checkOutput(lep, i, o);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+ }
+
+ public void testSimpleQuery2() {
+ LineageInfo lep = new LineageInfo();
+ try {
+ lep
+ .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src");
+ checkOutput(lep, i, o);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+ }
+
+ public void testSimpleQuery3() {
+ LineageInfo lep = new LineageInfo();
+ try {
+ lep
+ .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src");
+ i.add("src1");
+ checkOutput(lep, i, o);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+ }
+
+ public void testSimpleQuery4() {
+ LineageInfo lep = new LineageInfo();
+ try {
+ lep
+ .getLineageInfo("FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src1");
+ i.add("src2");
+ checkOutput(lep, i, o);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+ }
+
+ public void testSimpleQuery5() {
+ LineageInfo lep = new LineageInfo();
+ try {
+ lep
+ .getLineageInfo("insert overwrite table x select a.y, b.y from a a full outer join b b on (a.x = b.y)");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("a");
+ i.add("b");
+ o.add("x");
+ checkOutput(lep, i, o);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
}
-
- public void testSimpleQuery(){
- LineageInfo lep = new LineageInfo();
- try{
- lep.getLineageInfo(
- "INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
- TreeSet<String> i = new TreeSet<String>();
- TreeSet<String> o = new TreeSet<String>();
- i.add("srcpart");
- o.add("dest1");
- checkOutput(lep, i, o);
- }
- catch (Exception e) {
- e.printStackTrace();
- fail("Failed");
- }
- }
-
- public void testSimpleQuery2(){
- LineageInfo lep = new LineageInfo();
- try{
- lep.getLineageInfo(
- "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
- );
- TreeSet<String> i = new TreeSet<String>();
- TreeSet<String> o = new TreeSet<String>();
- i.add("src");
- checkOutput(lep, i, o);
- }
- catch (Exception e) {
- e.printStackTrace();
- fail("Failed");
- }
- }
-
- public void testSimpleQuery3(){
- LineageInfo lep = new LineageInfo();
- try{
- lep.getLineageInfo(
- "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
- );
- TreeSet<String> i = new TreeSet<String>();
- TreeSet<String> o = new TreeSet<String>();
- i.add("src");
- i.add("src1");
- checkOutput(lep, i, o);
- }
- catch (Exception e) {
- e.printStackTrace();
- fail("Failed");
- }
- }
-
- public void testSimpleQuery4(){
- LineageInfo lep = new LineageInfo();
- try{
- lep.getLineageInfo(
- "FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4" );
- TreeSet<String> i = new TreeSet<String>();
- TreeSet<String> o = new TreeSet<String>();
- i.add("src1");
- i.add("src2");
- checkOutput(lep, i, o);
- }
- catch (Exception e) {
- e.printStackTrace();
- fail("Failed");
- }
- }
}