You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dh...@apache.org on 2008/12/31 21:28:48 UTC

svn commit: r730467 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java

Author: dhruba
Date: Wed Dec 31 12:28:48 2008
New Revision: 730467

URL: http://svn.apache.org/viewvc?rev=730467&view=rev
Log:
HIVE-202. Fix Lineage for join queries.
(Suresh Antony via dhruba)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Dec 31 12:28:48 2008
@@ -170,3 +170,6 @@
     HIVE-196. Two test runs can run simultaneously on the same machine.
     (Ashish Thusoo via dhruba)
 
+    HIVE-202. Fix Lineage for join queries.
+    (Suresh Antony via dhruba)
+

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java Wed Dec 31 12:28:48 2008
@@ -33,7 +33,6 @@
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.ParseDriver;
@@ -50,82 +49,75 @@
  */
 public class LineageInfo  implements NodeProcessor {
 
-	/**
-	 * Stores input tables in sql
-	 */
-	TreeSet<String> inputTableList = new TreeSet<String>();
-	/**
-	 * Stores output tables in sql
-	 */
-	TreeSet<String> OutputTableList= new TreeSet<String>();
-
-	/**
-	 * 
-	 * @return java.util.TreeSet 
-	 */
-	public TreeSet<String> getInputTableList() {
-		return inputTableList;
-	}
-
-	/**
-	 * @return java.util.TreeSet
-	 */
-	public TreeSet<String> getOutputTableList() {
-		return OutputTableList;
-	}
-
-	/**
-	 * Implements the process method for the NodeProcessor interface.
-	 */
+  /**
+   * Stores input tables in sql
+   */
+  TreeSet<String> inputTableList = new TreeSet<String>();
+  /**
+   * Stores output tables in sql
+   */
+  TreeSet<String> OutputTableList= new TreeSet<String>();
+
+  /**
+   * 
+   * @return java.util.TreeSet 
+   */
+  public TreeSet<String> getInputTableList() {
+    return inputTableList;
+  }
+
+  /**
+   * @return java.util.TreeSet
+   */
+  public TreeSet<String> getOutputTableList() {
+    return OutputTableList;
+  }
+
+  /**
+   * Implements the process method for the NodeProcessor interface.
+   */
   @Override
   public void process(Node nd, NodeProcessorCtx procCtx)
-      throws SemanticException {
+  throws SemanticException {
     ASTNode pt = (ASTNode)nd;
-    switch (pt.getToken().getType()) {
 
-    case HiveParser.TOK_DESTINATION: {
-      if (pt.getChild(0).getType() == HiveParser.TOK_TAB) {
-        OutputTableList.add(pt.getChild(0).getChild(0).getText()) ;
-      }
+    switch (pt.getToken().getType()) {
 
+    case HiveParser.TOK_TAB:
+      OutputTableList.add(pt.getChild(0).getText()) ;
+      break;
+
+    case HiveParser.TOK_TABREF:
+      String table_name = ((ASTNode)pt.getChild(0)).getText();
+      inputTableList.add(table_name);
+      break;
     }
-    break;
-    case HiveParser.TOK_FROM: {
-      if (((ASTNode)pt.getChild(0)).getToken().getType() == HiveParser.TOK_TABREF) {
-        ASTNode tabRef = (ASTNode) pt.getChild(0);
-        String table_name = tabRef.getChild(0).getText();
-        inputTableList.add(table_name);
-      }
-    }
-    break;
-    }
-    
+
   }
-  
-	/**
-	 *  parses given query and gets the lineage info.
-	 * @param query
-	 * @throws ParseException
-	 */
-	public void getLineageInfo(String query) throws ParseException, SemanticException
-	{
-
-		/*
-		 *  Get the AST tree
-		 */
-		ParseDriver pd = new ParseDriver();
-		ASTNode tree = pd.parse(query);
-
-		while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
-			tree = (ASTNode) tree.getChild(0);
-		}
-
-		/*
-		 * initialize Event Processor and dispatcher.
-		 */
-		inputTableList.clear();
-		OutputTableList.clear();
-    
+
+  /**
+   *  parses given query and gets the lineage info.
+   * @param query
+   * @throws ParseException
+   */
+  public void getLineageInfo(String query) throws ParseException, SemanticException {
+
+    /*
+     *  Get the AST tree
+     */
+    ParseDriver pd = new ParseDriver();
+    ASTNode tree = pd.parse(query);
+
+    while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
+      tree = (ASTNode) tree.getChild(0);
+    }
+
+    /*
+     * initialize Event Processor and dispatcher.
+     */
+    inputTableList.clear();
+    OutputTableList.clear();
+
     // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher
     // generates the plan from the operator tree
     Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
@@ -133,28 +125,28 @@
     // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
     GraphWalker ogw = new DefaultGraphWalker(disp);
-   
+
     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();
     topNodes.add(tree);
     ogw.startWalking(topNodes);
-	}
+  }
 
-	public static void main(String[] args) throws IOException, ParseException,
-	SemanticException {
+  public static void main(String[] args) throws IOException, ParseException,
+  SemanticException {
 
-		String query = args[0];
+    String query = args[0];
 
-		LineageInfo lep = new LineageInfo();
+    LineageInfo lep = new LineageInfo();
 
-		lep.getLineageInfo(query);
+    lep.getLineageInfo(query);
 
-		for (String tab : lep.getInputTableList()) {
-			System.out.println("InputTable=" + tab);
-		}
+    for (String tab : lep.getInputTableList()) {
+      System.out.println("InputTable=" + tab);
+    }
 
-		for (String tab : lep.getOutputTableList()) {
-			System.out.println("OutputTable=" + tab);
-		}
-	}
+    for (String tab : lep.getOutputTableList()) {
+      System.out.println("OutputTable=" + tab);
+    }
+  }
 }

Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java?rev=730467&r1=730466&r2=730467&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java Wed Dec 31 12:28:48 2008
@@ -28,86 +28,101 @@
 
   /**
    * Checks whether the test outputs match the expected outputs
-   * @param lep The LineageInfo extracted from the test
-   * @param i The set of input tables
-   * @param o The set of output tables
+   * 
+   * @param lep
+   *          The LineageInfo extracted from the test
+   * @param i
+   *          The set of input tables
+   * @param o
+   *          The set of output tables
    */
   private void checkOutput(LineageInfo lep, TreeSet<String> i, TreeSet<String> o) {
-    
-    if ( !i.equals(lep.getInputTableList())){
+
+    if (!i.equals(lep.getInputTableList())) {
       fail("Input table not same");
     }
-    if (! o.equals(lep.getOutputTableList())){
+    if (!o.equals(lep.getOutputTableList())) {
       fail("Output table not same");
-    }    
+    }
+  }
+
+  public void testSimpleQuery() {
+    LineageInfo lep = new LineageInfo();
+    try {
+      lep
+          .getLineageInfo("INSERT OVERWRITE TABLE dest1 partition (ds = '111')  SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
+      TreeSet<String> i = new TreeSet<String>();
+      TreeSet<String> o = new TreeSet<String>();
+      i.add("srcpart");
+      o.add("dest1");
+      checkOutput(lep, i, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail("Failed");
+    }
+  }
+
+  public void testSimpleQuery2() {
+    LineageInfo lep = new LineageInfo();
+    try {
+      lep
+          .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*");
+      TreeSet<String> i = new TreeSet<String>();
+      TreeSet<String> o = new TreeSet<String>();
+      i.add("src");
+      checkOutput(lep, i, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail("Failed");
+    }
+  }
+
+  public void testSimpleQuery3() {
+    LineageInfo lep = new LineageInfo();
+    try {
+      lep
+          .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*");
+      TreeSet<String> i = new TreeSet<String>();
+      TreeSet<String> o = new TreeSet<String>();
+      i.add("src");
+      i.add("src1");
+      checkOutput(lep, i, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail("Failed");
+    }
+  }
+
+  public void testSimpleQuery4() {
+    LineageInfo lep = new LineageInfo();
+    try {
+      lep
+          .getLineageInfo("FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4");
+      TreeSet<String> i = new TreeSet<String>();
+      TreeSet<String> o = new TreeSet<String>();
+      i.add("src1");
+      i.add("src2");
+      checkOutput(lep, i, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail("Failed");
+    }
+  }
+
+  public void testSimpleQuery5() {
+    LineageInfo lep = new LineageInfo();
+    try {
+      lep
+          .getLineageInfo("insert overwrite table x select a.y, b.y from a a full outer join b b on (a.x = b.y)");
+      TreeSet<String> i = new TreeSet<String>();
+      TreeSet<String> o = new TreeSet<String>();
+      i.add("a");
+      i.add("b");
+      o.add("x");
+      checkOutput(lep, i, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail("Failed");
+    }
   }
-  
-	public void testSimpleQuery(){
-		LineageInfo lep = new LineageInfo();
-		try{
-			lep.getLineageInfo(
-			"INSERT OVERWRITE TABLE dest1 partition (ds = '111')  SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
-			TreeSet<String> i = new TreeSet<String>();
-			TreeSet<String> o = new TreeSet<String>();
-			 i.add("srcpart");
-			 o.add("dest1");
-			 checkOutput(lep, i, o);
-		}
-		catch (Exception e) {
-			e.printStackTrace();
-			fail("Failed");
-		}
-	}
-	
-	public void testSimpleQuery2(){
-		LineageInfo lep = new LineageInfo();
-		try{
-			lep.getLineageInfo(
-			"FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
-					);
-			TreeSet<String> i = new TreeSet<String>();
-			TreeSet<String> o = new TreeSet<String>();
-			i.add("src");
-			checkOutput(lep, i, o);
-		}
-		catch (Exception e) {
-			e.printStackTrace();
-			fail("Failed");
-		}			
-	}
-	
-	public void testSimpleQuery3(){
-		LineageInfo lep = new LineageInfo();
-		try{
-			lep.getLineageInfo(
-			"FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
-					);
-			TreeSet<String> i = new TreeSet<String>();
-			TreeSet<String> o = new TreeSet<String>();
-			i.add("src");
-			i.add("src1");
-			checkOutput(lep, i, o);
-		}
-		catch (Exception e) {
-			e.printStackTrace();
-			fail("Failed");
-		}
-	}
-	
-	public void testSimpleQuery4(){
-		LineageInfo lep = new LineageInfo();
-		try{
-			lep.getLineageInfo(
-					"FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4"					);
-			TreeSet<String> i = new TreeSet<String>();
-			TreeSet<String> o = new TreeSet<String>();
-			i.add("src1");
-			i.add("src2");
-			checkOutput(lep, i, o);
-		}
-		catch (Exception e) {
-			e.printStackTrace();
-			fail("Failed");
-		}		
-	}
 }