You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/12/11 06:55:30 UTC
svn commit: r889506 - in /hadoop/hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Fri Dec 11 05:55:30 2009
New Revision: 889506
URL: http://svn.apache.org/viewvc?rev=889506&view=rev
Log:
HIVE-946. Pass context to custom mapper/reducer
(Paul Yang via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q
hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out
hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hadoop/hive/trunk/conf/hive-default.xml
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Dec 11 05:55:30 2009
@@ -84,6 +84,9 @@
HIVE-549. Run independent tasks for a query in parallel.
(Chaitanya Mishra via namit)
+ HIVE-946. Pass context to custom mapper/reducer
+ (Paul Yang via namit)
+
IMPROVEMENTS
HIVE-760. Add version info to META-INF/MANIFEST.MF.
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Dec 11 05:55:30 2009
@@ -119,6 +119,7 @@
HIVETABLENAME("hive.table.name", ""),
HIVEPARTITIONNAME("hive.partition.name", ""),
HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false),
+ HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"),
HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"),
HIVEALIAS("hive.alias", ""),
HIVEMAPSIDEAGGREGATE("hive.map.aggr", "true"),
@@ -129,7 +130,7 @@
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5),
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float)0.5),
-
+
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Fri Dec 11 05:55:30 2009
@@ -293,6 +293,13 @@
</property>
<property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique script operator ID in the user's transform function (the custom mapper/reducer that the user has specified in the query)
+ </description>
+</property>
+
+<property>
<name>hive.exec.compress.output</name>
<value>false</value>
<description> This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description>
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java Fri Dec 11 05:55:30 2009
@@ -250,6 +250,12 @@
Map<String, String> env = pb.environment();
addJobConfToEnvironment(hconf, env);
env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String.valueOf(alias));
+
+ // Create an environment variable that uniquely identifies this script operator
+ String idEnvVarName = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESCRIPTIDENVVAR);
+ String idEnvVarVal = this.getOperatorId();
+ env.put(safeEnvVarName(idEnvVarName), idEnvVarVal);
+
scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs);
DataOutputStream scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream()));
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q Fri Dec 11 05:55:30 2009
@@ -0,0 +1,5 @@
+-- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q Fri Dec 11 05:55:30 2009
@@ -0,0 +1,5 @@
+set hive.script.operator.id.env.var = MY_ID;
+-- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out Fri Dec 11 05:55:30 2009
@@ -0,0 +1,18 @@
+PREHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+1
+1
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out Fri Dec 11 05:55:30 2009
@@ -0,0 +1,16 @@
+PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+1
+1