You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by js...@apache.org on 2011/03/21 22:55:19 UTC
svn commit: r1083981 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/hooks/
test/org/apache/hadoop/hive/ql/hooks/ test/queries/clientpositive/
test/results/clientpositive/
Author: jssarma
Date: Mon Mar 21 21:55:18 2011
New Revision: 1083981
URL: http://svn.apache.org/viewvc?rev=1083981&view=rev
Log:
HIVE-2052 - PostHook and PreHook API to add flag to indicate it is pre or post hook plus cache for content summary (Siying Dong via jssarma)
Added:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java
hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q
hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java Mon Mar 21 21:55:18 2011
@@ -35,16 +35,16 @@ import java.util.concurrent.ConcurrentHa
import org.antlr.runtime.TokenRewriteStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
/**
* Context for Semantic Analyzers. Usage: not reusable - construct a new one for
@@ -59,7 +59,7 @@ public class Context {
private int resDirFilesNum;
boolean initialized;
String originalTracker = null;
- private Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>();
+ private final Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>();
// scratch path to use for all non-local (ie. hdfs) file system tmp folders
private final Path nonLocalScratchPath;
@@ -70,7 +70,7 @@ public class Context {
// Keeps track of scratch directories created for different scheme/authority
private final Map<String, String> fsScratchDirs = new HashMap<String, String>();
- private Configuration conf;
+ private final Configuration conf;
protected int pathid = 10000;
protected boolean explain = false;
private TokenRewriteStream tokenRewriteStream;
@@ -144,9 +144,10 @@ public class Context {
try {
FileSystem fs = dirPath.getFileSystem(conf);
dirPath = new Path(fs.makeQualified(dirPath).toString());
- if (!fs.mkdirs(dirPath))
+ if (!fs.mkdirs(dirPath)) {
throw new RuntimeException("Cannot make directory: "
+ dirPath.toString());
+ }
} catch (IOException e) {
throw new RuntimeException (e);
}
@@ -181,8 +182,9 @@ public class Context {
// if we are executing entirely on the client side - then
// just (re)use the local scratch directory
- if(isLocalOnlyExecutionMode())
+ if(isLocalOnlyExecutionMode()) {
return getLocalScratchDir(!explain);
+ }
try {
Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
@@ -263,10 +265,11 @@ public class Context {
Path mrbase = new Path(getMRScratchDir());
URI relURI = mrbase.toUri().relativize(o.toUri());
- if (relURI.equals(o.toUri()))
+ if (relURI.equals(o.toUri())) {
throw new RuntimeException
("Invalid URI: " + originalURI + ", cannot relativize against" +
mrbase.toString());
+ }
return getLocalScratchDir(!explain) + Path.SEPARATOR +
relURI.getPath();
@@ -503,6 +506,10 @@ public class Context {
return pathToCS.get(path);
}
+ public Map<String, ContentSummary> getPathToCS() {
+ return pathToCS;
+ }
+
public Configuration getConf() {
return conf;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Mon Mar 21 21:55:18 2011
@@ -67,8 +67,8 @@ import org.apache.hadoop.hive.ql.lockmgr
import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -478,7 +478,7 @@ public class Driver implements CommandPr
Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
-
+
Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
for (ReadEntity read : inputs) {
if (read.getPartition() != null) {
@@ -551,8 +551,8 @@ public class Driver implements CommandPr
}
}
}
-
-
+
+
//cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
for (ReadEntity read : inputs) {
@@ -575,7 +575,7 @@ public class Driver implements CommandPr
} else if (read.getTable() != null) {
tbl = read.getTable();
}
-
+
// if we reach here, it means it needs to do a table authorization
// check, and the table authorization may already happened because of other
// partitions
@@ -591,7 +591,7 @@ public class Driver implements CommandPr
tableAuthChecked.add(tbl.getTableName());
}
}
-
+
}
}
@@ -738,7 +738,7 @@ public class Driver implements CommandPr
ctx.setHiveLockMgr(hiveLockMgr);
List<HiveLock> hiveLocks = null;
-
+
int tryNum = 1;
do {
@@ -755,7 +755,7 @@ public class Driver implements CommandPr
} catch (InterruptedException e) {
}
} while (tryNum < numRetries);
-
+
if (hiveLocks == null) {
throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
} else {
@@ -935,7 +935,8 @@ public class Driver implements CommandPr
}
resStream = null;
- HookContext hookContext = new HookContext(plan, conf);
+ HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+ hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
for (Hook peh : getPreExecHooks()) {
if (peh instanceof ExecuteWithHookContext) {
@@ -1064,6 +1065,7 @@ public class Driver implements CommandPr
plan.getOutputs().remove(output);
}
+ hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
// Get all the post execution hooks and execute them.
for (Hook peh : getPostExecHooks()) {
if (peh instanceof ExecuteWithHookContext) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java Mon Mar 21 21:55:18 2011
@@ -21,8 +21,11 @@ package org.apache.hadoop.hive.ql.hooks;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.TaskRunner;
@@ -34,6 +37,11 @@ import org.apache.hadoop.security.UserGr
* New implemented hook can get the query plan, job conf and the list of all completed tasks from this hook context
*/
public class HookContext {
+
+ static public enum HookType {
+ PRE_EXEC_HOOK, POST_EXEC_HOOK
+ }
+
private QueryPlan queryPlan;
private HiveConf conf;
private List<TaskRunner> completeTaskList;
@@ -41,11 +49,18 @@ public class HookContext {
private Set<WriteEntity> outputs;
private LineageInfo linfo;
private UserGroupInformation ugi;
-
+ private HookType hookType;
+ final private Map<String, ContentSummary> inputPathToContentSummary;
public HookContext(QueryPlan queryPlan, HiveConf conf) throws Exception{
+ this(queryPlan, conf, new ConcurrentHashMap<String, ContentSummary>());
+ }
+
+ public HookContext(QueryPlan queryPlan, HiveConf conf,
+ Map<String, ContentSummary> inputPathToContentSummary) throws Exception {
this.queryPlan = queryPlan;
this.conf = conf;
+ this.inputPathToContentSummary = inputPathToContentSummary;
completeTaskList = new ArrayList<TaskRunner>();
inputs = queryPlan.getInputs();
outputs = queryPlan.getOutputs();
@@ -116,5 +131,16 @@ public class HookContext {
this.ugi = ugi;
}
+ public Map<String, ContentSummary> getInputPathToContentSummary() {
+ return inputPathToContentSummary;
+ }
+
+ public HookType getHookType() {
+ return hookType;
+ }
+
+ public void setHookType(HookType hookType) {
+ this.hookType = hookType;
+ }
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java Mon Mar 21 21:55:18 2011
@@ -27,6 +27,7 @@ import java.util.Set;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
@@ -95,6 +96,7 @@ public class PostExecutePrinter implemen
@Override
public void run(HookContext hookContext) throws Exception {
+ assert(hookContext.getHookType() == HookType.POST_EXEC_HOOK);
SessionState ss = SessionState.get();
Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs();
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java?rev=1083981&r1=1083980&r2=1083981&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java Mon Mar 21 21:55:18 2011
@@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Set;
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.security.UserGroupInformation;
@@ -35,6 +36,7 @@ public class PreExecutePrinter implement
@Override
public void run(HookContext hookContext) throws Exception {
+ assert(hookContext.getHookType() == HookType.PRE_EXEC_HOOK);
SessionState ss = SessionState.get();
Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs();
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyContentSummaryCacheHook.java Mon Mar 21 21:55:18 2011
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.hive.ql.hooks.HookContext.HookType;
+
+public class VerifyContentSummaryCacheHook implements ExecuteWithHookContext {
+
+ public void run(HookContext hookContext) {
+ Map<String, ContentSummary> inputToCS = hookContext.getInputPathToContentSummary();
+ if (hookContext.getHookType().equals(HookType.PRE_EXEC_HOOK)) {
+ Assert.assertEquals(0, inputToCS.size());
+ } else {
+ Assert.assertEquals(1, inputToCS.size());
+ for (String key : inputToCS.keySet()) {
+ if (!key.equals("/tmp/VerifyContentSummaryCacheHook") &&
+ !key.equals("pfile:/tmp/VerifyContentSummaryCacheHook")) {
+ Assert.fail("VerifyContentSummaryCacheHook fails the input path check");
+ }
+ }
+ }
+ }
+}
Added: hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/hook_context_cs.q Mon Mar 21 21:55:18 2011
@@ -0,0 +1,14 @@
+drop table vcsc;
+CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING);
+ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook';
+
+set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.VerifyContentSummaryCacheHook;
+SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c;
+
+set mapred.job.tracker=local;
+set hive.exec.pre.hooks = ;
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyContentSummaryCacheHook;
+SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c;
+
+set hive.exec.post.hooks=;
+drop table vcsc;
Added: hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out?rev=1083981&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/hook_context_cs.q.out Mon Mar 21 21:55:18 2011
@@ -0,0 +1,20 @@
+PREHOOK: query: drop table vcsc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table vcsc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE vcsc (c STRING) PARTITIONED BY (ds STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vcsc
+PREHOOK: query: ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook'
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@vcsc
+POSTHOOK: query: ALTER TABLE vcsc ADD partition (ds='dummy') location '/tmp/VerifyContentSummaryCacheHook'
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@vcsc
+POSTHOOK: Output: default@vcsc@ds=dummy
+POSTHOOK: query: SELECT a.c, b.c FROM vcsc a JOIN vcsc b ON a.ds = 'dummy' AND b.ds = 'dummy' AND a.c = b.c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vcsc@ds=dummy
+POSTHOOK: Output: file:/tmp/sdong/hive_2011-03-14_04-46-27_374_2422598099938727948/-mr-10000