You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by js...@apache.org on 2011/03/21 22:55:19 UTC

svn commit: r1083981 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/hooks/ test/org/apache/hadoop/hive/ql/hooks/ test/queries/clientpositive/ test/results/clientpositive/

Author: jssarma
Date: Mon Mar 21 21:55:18 2011
New Revision: 1083981

URL: http://svn.apache.org/viewvc?rev=1083981&view=rev
Log:
HIVE-2052 - PostHook and PreHook API to add flag to indicate it is pre or post hook plus cache for content summary (Siying Dong via jssarma)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java
    hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q
    hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java Mon Mar 21 21:55:18 2011
@@ -35,16 +35,16 @@ import java.util.concurrent.ConcurrentHa
 import org.antlr.runtime.TokenRewriteStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
 
 /**
  * Context for Semantic Analyzers. Usage: not reusable - construct a new one for
@@ -59,7 +59,7 @@ public class Context {
   private int resDirFilesNum;
   boolean initialized;
   String originalTracker = null;
-  private Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>();
+  private final Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>();
 
   // scratch path to use for all non-local (ie. hdfs) file system tmp folders
   private final Path nonLocalScratchPath;
@@ -70,7 +70,7 @@ public class Context {
   // Keeps track of scratch directories created for different scheme/authority
   private final Map<String, String> fsScratchDirs = new HashMap<String, String>();
 
-  private Configuration conf;
+  private final Configuration conf;
   protected int pathid = 10000;
   protected boolean explain = false;
   private TokenRewriteStream tokenRewriteStream;
@@ -144,9 +144,10 @@ public class Context {
         try {
           FileSystem fs = dirPath.getFileSystem(conf);
           dirPath = new Path(fs.makeQualified(dirPath).toString());
-          if (!fs.mkdirs(dirPath))
+          if (!fs.mkdirs(dirPath)) {
             throw new RuntimeException("Cannot make directory: "
                                        + dirPath.toString());
+          }
         } catch (IOException e) {
           throw new RuntimeException (e);
         }
@@ -181,8 +182,9 @@ public class Context {
 
     // if we are executing entirely on the client side - then
     // just (re)use the local scratch directory
-    if(isLocalOnlyExecutionMode())
+    if(isLocalOnlyExecutionMode()) {
       return getLocalScratchDir(!explain);
+    }
 
     try {
       Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
@@ -263,10 +265,11 @@ public class Context {
     Path mrbase = new Path(getMRScratchDir());
 
     URI relURI = mrbase.toUri().relativize(o.toUri());
-    if (relURI.equals(o.toUri()))
+    if (relURI.equals(o.toUri())) {
       throw new RuntimeException
         ("Invalid URI: " + originalURI + ", cannot relativize against" +
          mrbase.toString());
+    }
 
     return getLocalScratchDir(!explain) + Path.SEPARATOR +
       relURI.getPath();
@@ -503,6 +506,10 @@ public class Context {
     return pathToCS.get(path);
   }
 
+  public Map<String, ContentSummary> getPathToCS() {
+    return pathToCS;
+  }
+
   public Configuration getConf() {
     return conf;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Mon Mar 21 21:55:18 2011
@@ -67,8 +67,8 @@ import org.apache.hadoop.hive.ql.lockmgr
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
 import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
 import org.apache.hadoop.hive.ql.metadata.DummyPartition;
 import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -478,7 +478,7 @@ public class Driver implements CommandPr
 
       Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
       Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
-      
+
       Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
       for (ReadEntity read : inputs) {
         if (read.getPartition() != null) {
@@ -551,8 +551,8 @@ public class Driver implements CommandPr
           }
         }
       }
-      
-      
+
+
       //cache the results for table authorization
       Set<String> tableAuthChecked = new HashSet<String>();
       for (ReadEntity read : inputs) {
@@ -575,7 +575,7 @@ public class Driver implements CommandPr
         } else if (read.getTable() != null) {
           tbl = read.getTable();
         }
-        
+
         // if we reach here, it means it needs to do a table authorization
         // check, and the table authorization may already happened because of other
         // partitions
@@ -591,7 +591,7 @@ public class Driver implements CommandPr
           tableAuthChecked.add(tbl.getTableName());
         }
       }
-      
+
     }
   }
 
@@ -738,7 +738,7 @@ public class Driver implements CommandPr
 
       ctx.setHiveLockMgr(hiveLockMgr);
       List<HiveLock> hiveLocks = null;
-      
+
       int tryNum = 1;
       do {
 
@@ -755,7 +755,7 @@ public class Driver implements CommandPr
         } catch (InterruptedException e) {
         }
       } while (tryNum < numRetries);
-      
+
       if (hiveLocks == null) {
         throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
       } else {
@@ -935,7 +935,8 @@ public class Driver implements CommandPr
       }
       resStream = null;
 
-      HookContext hookContext = new HookContext(plan, conf);
+      HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+      hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
 
       for (Hook peh : getPreExecHooks()) {
         if (peh instanceof ExecuteWithHookContext) {
@@ -1064,6 +1065,7 @@ public class Driver implements CommandPr
         plan.getOutputs().remove(output);
       }
 
+      hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
       // Get all the post execution hooks and execute them.
       for (Hook peh : getPostExecHooks()) {
         if (peh instanceof ExecuteWithHookContext) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java Mon Mar 21 21:55:18 2011
@@ -21,8 +21,11 @@ package org.apache.hadoop.hive.ql.hooks;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.TaskRunner;
@@ -34,6 +37,11 @@ import org.apache.hadoop.security.UserGr
  * New implemented hook can get the query plan, job conf and the list of all completed tasks from this hook context
  */
 public class HookContext {
+
+  static public enum HookType {
+    PRE_EXEC_HOOK, POST_EXEC_HOOK
+  }
+
   private QueryPlan queryPlan;
   private HiveConf conf;
   private List<TaskRunner> completeTaskList;
@@ -41,11 +49,18 @@ public class HookContext {
   private Set<WriteEntity> outputs;
   private LineageInfo linfo;
   private UserGroupInformation ugi;
-
+  private HookType hookType;
+  final private Map<String, ContentSummary> inputPathToContentSummary;
 
   public HookContext(QueryPlan queryPlan, HiveConf conf) throws Exception{
+    this(queryPlan, conf, new ConcurrentHashMap<String, ContentSummary>());
+  }
+
+  public HookContext(QueryPlan queryPlan, HiveConf conf,
+      Map<String, ContentSummary> inputPathToContentSummary) throws Exception {
     this.queryPlan = queryPlan;
     this.conf = conf;
+    this.inputPathToContentSummary = inputPathToContentSummary;
     completeTaskList = new ArrayList<TaskRunner>();
     inputs = queryPlan.getInputs();
     outputs = queryPlan.getOutputs();
@@ -116,5 +131,16 @@ public class HookContext {
     this.ugi = ugi;
   }
 
+  public Map<String, ContentSummary> getInputPathToContentSummary() {
+    return inputPathToContentSummary;
+  }
+
+  public HookType getHookType() {
+    return hookType;
+  }
+
+  public void setHookType(HookType hookType) {
+    this.hookType = hookType;
+  }
 
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java Mon Mar 21 21:55:18 2011
@@ -27,6 +27,7 @@ import java.util.Set;
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
@@ -95,6 +96,7 @@ public class PostExecutePrinter implemen
 
   @Override
   public void run(HookContext hookContext) throws Exception {
+    assert(hookContext.getHookType() == HookType.POST_EXEC_HOOK);
     SessionState ss = SessionState.get();
     Set<ReadEntity> inputs = hookContext.getInputs();
     Set<WriteEntity> outputs = hookContext.getOutputs();

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java Mon Mar 21 21:55:18 2011
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -35,6 +36,7 @@ public class PreExecutePrinter implement
 
   @Override
   public void run(HookContext hookContext) throws Exception {
+    assert(hookContext.getHookType() == HookType.PRE_EXEC_HOOK);
     SessionState ss = SessionState.get();
     Set<ReadEntity> inputs = hookContext.getInputs();
     Set<WriteEntity> outputs = hookContext.getOutputs();

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java Mon Mar 21 21:55:18 2011
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
+
+public class VerifyContentSummaryCacheHook implements ExecuteWithHookContext {
+
+  public void run(HookContext hookContext) {
+    Map<String, ContentSummary> inputToCS = hookContext.getInputPathToContentSummary();
+    if (hookContext.getHookType().equals(HookType.PRE_EXEC_HOOK)) {
+      Assert.assertEquals(0, inputToCS.size());
+    } else {
+      Assert.assertEquals(1, inputToCS.size());
+      for (String key : inputToCS.keySet()) {
+        if (!key.equals("/tmp/VerifyContentSummaryCacheHook") &&
+            !key.equals("pfile:/tmp/VerifyContentSummaryCacheHook")) {
+          Assert.fail("VerifyContentSummaryCacheHook fails the input path check");
+        }
+      }
+    }
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q Mon Mar 21 21:55:18 2011
@@ -0,0 +1,14 @@
+drop table vcsc;
+CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING);
+ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook';
+
+set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.VerifyContentSummaryCacheHook;
+SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c;
+
+set mapred.job.tracker=local;
+set hive.exec.pre.hooks = ;
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyContentSummaryCacheHook;
+SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c;
+
+set hive.exec.post.hooks=;
+drop table vcsc;

Added: hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out Mon Mar 21 21:55:18 2011
@@ -0,0 +1,20 @@
+PREHOOK: query: drop table vcsc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table vcsc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vcsc
+PREHOOK: query: ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook'
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@vcsc
+POSTHOOK: query: ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook'
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@vcsc
+POSTHOOK: Output: default@vcsc@ds=dummy
+POSTHOOK: query: SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vcsc@ds=dummy
+POSTHOOK: Output: file:/tmp/sdong/hive_2011-03-14_04-46-27_374_2422598099938727948/-mr-10000