You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/13 04:07:28 UTC

svn commit: r726162 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/hadoop/hive/ql/tools/ ql/src/test/org/apache/hadoop/hive/ql/tool/

Author: zshao
Date: Fri Dec 12 19:07:25 2008
New Revision: 726162

URL: http://svn.apache.org/viewvc?rev=726162&view=rev
Log:
HIVE-147. Add a tool for extracting lineage info from hive sql.
(Suresh Antony via zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=726162&r1=726161&r2=726162&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Dec 12 19:07:25 2008
@@ -6,6 +6,9 @@
 
   NEW FEATURES
 
+    HIVE-147. Add a tool for extracting lineage info from hive sql.
+    (Suresh Antony via zshao)
+
     HIVE-140. Event Based Infrastructure for Syntax Trees in the compiler.
     (Ashish Thusoo through zshao)
 
@@ -36,6 +39,9 @@
 
   BUG FIXES
 
+    HIVE-147. Add a tool for extracting lineage info from hive sql.
+    (Suresh Antony via zshao)
+
     HIVE-114. Drop partition does not delete data for external tables now.
     (Johan Oskarsson via zshao)
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java?rev=726162&r1=726161&r2=726162&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java Fri Dec 12 19:07:25 2008
@@ -41,7 +41,7 @@
 	/**
 	 * Constructs the default event dispatcher
 	 */
-	DefaultASTEventDispatcher() {
+	public  DefaultASTEventDispatcher() {
 		dispatchMap = new HashMap<ASTEvent, ArrayList<ASTEventProcessor>>();
 	}
 	

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java?rev=726162&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java Fri Dec 12 19:07:25 2008
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.hive.ql.tools;
+
+import java.io.IOException;
+import java.util.TreeSet;
+
+import org.antlr.runtime.tree.CommonTree;
+import org.apache.hadoop.hive.ql.parse.ASTEvent;
+import org.apache.hadoop.hive.ql.parse.ASTEventProcessor;
+import org.apache.hadoop.hive.ql.parse.DefaultASTEventDispatcher;
+import org.apache.hadoop.hive.ql.parse.DefaultASTProcessor;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * 
+ * This class prints out the lineage info. 
+ * It takes sql as input and prints lineage info.
+ * Currently this prints only input and output tables for a given sql. 
+ *  Later we can expand to add join tables etc.
+ *
+ */
+public class LineageInfo  implements ASTEventProcessor {
+
+	/**
+	 * Stores input tables in sql
+	 */
+	TreeSet<String> inputTableList = new TreeSet<String>();
+	/**
+	 * Stores output tables in sql
+	 */
+	TreeSet<String> OutputTableList= new TreeSet<String>();
+
+	/**
+	 * 
+	 * @return java.util.TreeSet 
+	 */
+	public TreeSet<String> getInputTableList() {
+		return inputTableList;
+	}
+
+	/**
+	 * @return java.util.TreeSet
+	 */
+	public TreeSet<String> getOutputTableList() {
+		return OutputTableList;
+	}
+
+	/* (non-Javadoc)
+	 * @see org.apache.hadoop.hive.ql.parse.ASTEventProcessor#process(org.antlr.runtime.tree.CommonTree)
+	 */
+	public void process(CommonTree pt) {
+
+		switch (pt.getToken().getType()) {
+
+		case HiveParser.TOK_DESTINATION: {
+			if (pt.getChild(0).getType() == HiveParser.TOK_TAB) {
+				OutputTableList.add(pt.getChild(0).getChild(0).getText())	;
+			}
+
+		}
+		break;
+		case HiveParser.TOK_FROM: {
+			CommonTree tabRef = (CommonTree) pt.getChild(0);
+			String table_name = tabRef.getChild(0).getText();
+			inputTableList.add(table_name);
+		}
+		break;
+		}
+	}
+	/**
+	 *  parses given query and gets the lineage info.
+	 * @param query
+	 * @throws ParseException
+	 */
+	public void getLineageInfo(String query) throws ParseException
+	{
+
+		/*
+		 *  Get the AST tree
+		 */
+		ParseDriver pd = new ParseDriver();
+		CommonTree tree = pd.parse(query);
+
+		while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
+			tree = (CommonTree) tree.getChild(0);
+		}
+
+		/*
+		 * initialize Event Processor and dispatcher.
+		 */
+		inputTableList.clear();
+		OutputTableList.clear();
+		DefaultASTEventDispatcher dispatcher = new DefaultASTEventDispatcher();
+		dispatcher.register(ASTEvent.SRC_TABLE, this);
+		dispatcher.register(ASTEvent.DESTINATION, this);
+
+		DefaultASTProcessor eventProcessor = new DefaultASTProcessor();
+
+		eventProcessor.setDispatcher(dispatcher);
+		eventProcessor.process(tree);
+	}
+
+	public static void main(String[] args) throws IOException, ParseException,
+	SemanticException {
+
+		String query = args[0];
+
+		LineageInfo lep = new LineageInfo();
+
+		lep.getLineageInfo(query);
+
+		for (String tab : lep.getInputTableList()) {
+			System.out.println("InputTable=" + tab);
+		}
+
+		for (String tab : lep.getOutputTableList()) {
+			System.out.println("OutputTable=" + tab);
+		}
+	}
+}

Added: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java?rev=726162&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (added)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java Fri Dec 12 19:07:25 2008
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.tool;
+
+import java.util.TreeSet;
+import java.util.Vector;
+
+import org.apache.hadoop.hive.ql.tools.LineageInfo;
+
+import junit.framework.TestCase;
+
+public class TestLineageInfo extends TestCase {
+
+	public void testSimpleQuery(){
+		LineageInfo lep = new LineageInfo();
+		try{
+			lep.getLineageInfo(
+			"INSERT OVERWRITE TABLE dest1 partition (ds = '111')  SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
+			TreeSet<String> i = new TreeSet<String>();
+			TreeSet<String> o = new TreeSet<String>();
+			 i.add("srcpart");
+			 o.add("dest1");
+			 if ( !i.equals(lep.getInputTableList())){
+				 fail("Input table not same");
+			 }
+			 if (! o.equals(lep.getOutputTableList())){
+				 fail("Output table not same");
+			 }
+						 
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			fail("Failed");
+		}
+					
+	}
+	
+	public void testSimpleQuery2(){
+		LineageInfo lep = new LineageInfo();
+		try{
+			lep.getLineageInfo(
+			"FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
+					);
+			TreeSet<String> i = new TreeSet<String>();
+			TreeSet<String> o = new TreeSet<String>();
+			 i.add("src");
+			 
+			 if ( !i.equals(lep.getInputTableList())){
+				 fail("Input table not same");
+			 }
+			 if (! o.equals(lep.getOutputTableList())){
+				 fail("Output table not same");
+			 }
+			 
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			fail("Failed");
+		}
+					
+	}
+	
+	public void testSimpleQuery3(){
+		LineageInfo lep = new LineageInfo();
+		try{
+			lep.getLineageInfo(
+			"FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
+					);
+			TreeSet<String> i = new TreeSet<String>();
+			TreeSet<String> o = new TreeSet<String>();
+			 i.add("src");
+			 i.add("src1");
+			 if ( !i.equals(lep.getInputTableList())){
+				 fail("Input table not same");
+			 }
+			 if (! o.equals(lep.getOutputTableList())){
+				 fail("Output table not same");
+			 }
+			 
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			fail("Failed");
+		}
+			
+	}
+	
+	public void testSimpleQuery4(){
+		LineageInfo lep = new LineageInfo();
+		try{
+			lep.getLineageInfo(
+					"FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4"					);
+			TreeSet<String> i = new TreeSet<String>();
+			TreeSet<String> o = new TreeSet<String>();
+			 i.add("src1");
+			 i.add("src2");
+			 if ( !i.equals(lep.getInputTableList())){
+				 fail("Input table not same");
+			 }
+			 if (! o.equals(lep.getOutputTableList())){
+				 fail("Output table not same");
+			 } 
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			fail("Failed");
+		}		
+	}
+}