You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/12/11 06:55:30 UTC

svn commit: r889506 - in /hadoop/hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Fri Dec 11 05:55:30 2009
New Revision: 889506

URL: http://svn.apache.org/viewvc?rev=889506&view=rev
Log:
HIVE-946. Pass context to custom mapper/reducer
(Paul Yang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hadoop/hive/trunk/conf/hive-default.xml
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Dec 11 05:55:30 2009
@@ -84,6 +84,9 @@
     HIVE-549. Run independent tasks for a query in parallel.
     (Chaitanya Mishra via namit)
 
+    HIVE-946. Pass context to custom mapper/reducer
+    (Paul Yang via namit)
+
   IMPROVEMENTS
 
     HIVE-760. Add version info to META-INF/MANIFEST.MF.

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Dec 11 05:55:30 2009
@@ -119,6 +119,7 @@
     HIVETABLENAME("hive.table.name", ""),
     HIVEPARTITIONNAME("hive.partition.name", ""),
     HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false),
+    HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"),
     HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"),
     HIVEALIAS("hive.alias", ""),
     HIVEMAPSIDEAGGREGATE("hive.map.aggr", "true"),
@@ -129,7 +130,7 @@
     HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
     HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5),
     HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float)0.5),
-
+    
     // for hive udtf operator
     HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
     

Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Fri Dec 11 05:55:30 2009
@@ -293,6 +293,13 @@
 </property>
 
 <property>
+  <name>hive.script.operator.id.env.var</name>
+  <value>HIVE_SCRIPT_OPERATOR_ID</value>
+  <description> Name of the environment variable that holds the unique script operator ID in the user's transform function (the custom mapper/reducer that the user has specified in the query)
+  </description>
+</property>
+
+<property>
   <name>hive.exec.compress.output</name>
   <value>false</value>
   <description> This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description>

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java?rev=889506&r1=889505&r2=889506&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java Fri Dec 11 05:55:30 2009
@@ -250,6 +250,12 @@
         Map<String, String> env = pb.environment();
         addJobConfToEnvironment(hconf, env);
         env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String.valueOf(alias));
+        
+        // Create an environment variable that uniquely identifies this script operator
+        String idEnvVarName = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESCRIPTIDENVVAR);
+        String idEnvVarVal = this.getOperatorId();
+        env.put(safeEnvVarName(idEnvVarName), idEnvVarVal);
+        
         scriptPid = pb.start();       // Runtime.getRuntime().exec(wrappedCmdArgs);
 
         DataOutputStream scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream()));

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var1.q Fri Dec 11 05:55:30 2009
@@ -0,0 +1,5 @@
+-- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/script_env_var2.q Fri Dec 11 05:55:30 2009
@@ -0,0 +1,5 @@
+set hive.script.operator.id.env.var = MY_ID;
+-- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var1.q.out Fri Dec 11 05:55:30 2009
@@ -0,0 +1,18 @@
+PREHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+1
+1

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out?rev=889506&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/script_env_var2.q.out Fri Dec 11 05:55:30 2009
@@ -0,0 +1,16 @@
+PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+  SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+1
+1