You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/13 04:07:28 UTC
svn commit: r726162 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/java/org/apache/hadoop/hive/ql/tools/
ql/src/test/org/apache/hadoop/hive/ql/tool/
Author: zshao
Date: Fri Dec 12 19:07:25 2008
New Revision: 726162
URL: http://svn.apache.org/viewvc?rev=726162&view=rev
Log:
HIVE-147. Add a tool for extracting lineage info from hive sql.
(Suresh Antony via zshao)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=726162&r1=726161&r2=726162&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Dec 12 19:07:25 2008
@@ -6,6 +6,9 @@
NEW FEATURES
+ HIVE-147. Add a tool for extracting lineage info from hive sql.
+ (Suresh Antony via zshao)
+
HIVE-140. Event Based Infrastructure for Syntax Trees in the compiler.
(Ashish Thusoo through zshao)
@@ -36,6 +39,9 @@
BUG FIXES
+ HIVE-147. Add a tool for extracting lineage info from hive sql.
+ (Suresh Antony via zshao)
+
HIVE-114. Drop partition does not delete data for external tables now.
(Johan Oskarsson via zshao)
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java?rev=726162&r1=726161&r2=726162&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java Fri Dec 12 19:07:25 2008
@@ -41,7 +41,7 @@
/**
* Constructs the default event dispatcher
*/
- DefaultASTEventDispatcher() {
+ public DefaultASTEventDispatcher() {
dispatchMap = new HashMap<ASTEvent, ArrayList<ASTEventProcessor>>();
}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java?rev=726162&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java Fri Dec 12 19:07:25 2008
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.hive.ql.tools;
+
+import java.io.IOException;
+import java.util.TreeSet;
+
+import org.antlr.runtime.tree.CommonTree;
+import org.apache.hadoop.hive.ql.parse.ASTEvent;
+import org.apache.hadoop.hive.ql.parse.ASTEventProcessor;
+import org.apache.hadoop.hive.ql.parse.DefaultASTEventDispatcher;
+import org.apache.hadoop.hive.ql.parse.DefaultASTProcessor;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ *
+ * This class prints out the lineage info.
+ * It takes sql as input and prints lineage info.
+ * Currently this prints only input and output tables for a given sql.
+ * Later we can expand to add join tables etc.
+ *
+ */
+public class LineageInfo implements ASTEventProcessor {
+
+ /**
+ * Stores input tables in sql
+ */
+ TreeSet<String> inputTableList = new TreeSet<String>();
+ /**
+ * Stores output tables in sql
+ */
+ TreeSet<String> OutputTableList= new TreeSet<String>();
+
+ /**
+ *
+ * @return java.util.TreeSet
+ */
+ public TreeSet<String> getInputTableList() {
+ return inputTableList;
+ }
+
+ /**
+ * @return java.util.TreeSet
+ */
+ public TreeSet<String> getOutputTableList() {
+ return OutputTableList;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.parse.ASTEventProcessor#process(org.antlr.runtime.tree.CommonTree)
+ */
+ public void process(CommonTree pt) {
+
+ switch (pt.getToken().getType()) {
+
+ case HiveParser.TOK_DESTINATION: {
+ if (pt.getChild(0).getType() == HiveParser.TOK_TAB) {
+ OutputTableList.add(pt.getChild(0).getChild(0).getText()) ;
+ }
+
+ }
+ break;
+ case HiveParser.TOK_FROM: {
+ CommonTree tabRef = (CommonTree) pt.getChild(0);
+ String table_name = tabRef.getChild(0).getText();
+ inputTableList.add(table_name);
+ }
+ break;
+ }
+ }
+ /**
+ * parses given query and gets the lineage info.
+ * @param query
+ * @throws ParseException
+ */
+ public void getLineageInfo(String query) throws ParseException
+ {
+
+ /*
+ * Get the AST tree
+ */
+ ParseDriver pd = new ParseDriver();
+ CommonTree tree = pd.parse(query);
+
+ while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
+ tree = (CommonTree) tree.getChild(0);
+ }
+
+ /*
+ * initialize Event Processor and dispatcher.
+ */
+ inputTableList.clear();
+ OutputTableList.clear();
+ DefaultASTEventDispatcher dispatcher = new DefaultASTEventDispatcher();
+ dispatcher.register(ASTEvent.SRC_TABLE, this);
+ dispatcher.register(ASTEvent.DESTINATION, this);
+
+ DefaultASTProcessor eventProcessor = new DefaultASTProcessor();
+
+ eventProcessor.setDispatcher(dispatcher);
+ eventProcessor.process(tree);
+ }
+
+ public static void main(String[] args) throws IOException, ParseException,
+ SemanticException {
+
+ String query = args[0];
+
+ LineageInfo lep = new LineageInfo();
+
+ lep.getLineageInfo(query);
+
+ for (String tab : lep.getInputTableList()) {
+ System.out.println("InputTable=" + tab);
+ }
+
+ for (String tab : lep.getOutputTableList()) {
+ System.out.println("OutputTable=" + tab);
+ }
+ }
+}
Added: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java?rev=726162&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (added)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java Fri Dec 12 19:07:25 2008
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.tool;
+
+import java.util.TreeSet;
+import java.util.Vector;
+
+import org.apache.hadoop.hive.ql.tools.LineageInfo;
+
+import junit.framework.TestCase;
+
+public class TestLineageInfo extends TestCase {
+
+ public void testSimpleQuery(){
+ LineageInfo lep = new LineageInfo();
+ try{
+ lep.getLineageInfo(
+ "INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'");
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("srcpart");
+ o.add("dest1");
+ if ( !i.equals(lep.getInputTableList())){
+ fail("Input table not same");
+ }
+ if (! o.equals(lep.getOutputTableList())){
+ fail("Output table not same");
+ }
+
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+
+ }
+
+ public void testSimpleQuery2(){
+ LineageInfo lep = new LineageInfo();
+ try{
+ lep.getLineageInfo(
+ "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
+ );
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src");
+
+ if ( !i.equals(lep.getInputTableList())){
+ fail("Input table not same");
+ }
+ if (! o.equals(lep.getOutputTableList())){
+ fail("Output table not same");
+ }
+
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+
+ }
+
+ public void testSimpleQuery3(){
+ LineageInfo lep = new LineageInfo();
+ try{
+ lep.getLineageInfo(
+ "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"
+ );
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src");
+ i.add("src1");
+ if ( !i.equals(lep.getInputTableList())){
+ fail("Input table not same");
+ }
+ if (! o.equals(lep.getOutputTableList())){
+ fail("Output table not same");
+ }
+
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+
+ }
+
+ public void testSimpleQuery4(){
+ LineageInfo lep = new LineageInfo();
+ try{
+ lep.getLineageInfo(
+ "FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4" );
+ TreeSet<String> i = new TreeSet<String>();
+ TreeSet<String> o = new TreeSet<String>();
+ i.add("src1");
+ i.add("src2");
+ if ( !i.equals(lep.getInputTableList())){
+ fail("Input table not same");
+ }
+ if (! o.equals(lep.getOutputTableList())){
+ fail("Output table not same");
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ fail("Failed");
+ }
+ }
+}