You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by rk...@apache.org on 2014/07/14 23:35:21 UTC

git commit: OOZIE-1740 Add a new function hadoop:conf() that can be invoked from the workflow.xml and will return a hadoop configuration option (sam liu via rkanter)

Repository: oozie
Updated Branches:
  refs/heads/master d5b13db2b -> 47d62743a


OOZIE-1740 Add a new function hadoop:conf() that can be invoked from the workflow.xml and will return a hadoop configuration option (sam liu via rkanter)


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/47d62743
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/47d62743
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/47d62743

Branch: refs/heads/master
Commit: 47d62743a8b639f65226c5e78455989605f5aabf
Parents: d5b13db
Author: Robert Kanter <rk...@cloudera.com>
Authored: Mon Jul 14 14:30:27 2014 -0700
Committer: Robert Kanter <rk...@cloudera.com>
Committed: Mon Jul 14 14:30:27 2014 -0700

----------------------------------------------------------------------
 .../oozie/action/hadoop/HadoopELFunctions.java  | 16 +++++++++
 core/src/main/resources/oozie-default.xml       | 31 ++++++++++++------
 .../action/hadoop/TestHadoopELFunctions.java    | 34 ++++++++++++++++++++
 .../src/site/twiki/WorkflowFunctionalSpec.twiki | 30 +++++++++++++++++
 release-log.txt                                 |  1 +
 5 files changed, 102 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/47d62743/core/src/main/java/org/apache/oozie/action/hadoop/HadoopELFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/HadoopELFunctions.java b/core/src/main/java/org/apache/oozie/action/hadoop/HadoopELFunctions.java
index d325535..83813a1 100644
--- a/core/src/main/java/org/apache/oozie/action/hadoop/HadoopELFunctions.java
+++ b/core/src/main/java/org/apache/oozie/action/hadoop/HadoopELFunctions.java
@@ -17,7 +17,10 @@
  */
 package org.apache.oozie.action.hadoop;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.oozie.DagELFunctions;
+import org.apache.oozie.service.HadoopAccessorService;
+import org.apache.oozie.service.Services;
 import org.apache.oozie.util.ELEvaluationException;
 import org.apache.oozie.util.XLog;
 import org.apache.oozie.workflow.WorkflowInstance;
@@ -58,6 +61,19 @@ public class HadoopELFunctions {
         return counters;
     }
 
+    public static String hadoop_conf(String hadoopConfHostPort, String propName) {
+        Configuration conf = Services.get().get(HadoopAccessorService.class)
+            .createJobConf(hadoopConfHostPort);
+        String prop = conf.get(propName);
+        if (prop == null || prop.equals("")) {
+            conf = new Configuration();
+            prop = conf.get(propName);
+        }
+        if (prop == null)
+            prop = "";
+        return prop;
+    }
+
     @SuppressWarnings("unchecked")
     private static Map<String, Map<String, Long>> getCounters(String nodeName) throws ELEvaluationException {
         String jsonCounters = DagELFunctions.getActionVar(nodeName, MapReduceActionExecutor.HADOOP_COUNTERS);

http://git-wip-us.apache.org/repos/asf/oozie/blob/47d62743/core/src/main/resources/oozie-default.xml
----------------------------------------------------------------------
diff --git a/core/src/main/resources/oozie-default.xml b/core/src/main/resources/oozie-default.xml
index b38801f..2b68436 100644
--- a/core/src/main/resources/oozie-default.xml
+++ b/core/src/main/resources/oozie-default.xml
@@ -616,6 +616,7 @@
             wf:actionTrackerUri=org.apache.oozie.DagELFunctions#wf_actionTrackerUri,
             wf:actionExternalStatus=org.apache.oozie.DagELFunctions#wf_actionExternalStatus,
             hadoop:counters=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_counters,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf,
             fs:exists=org.apache.oozie.action.hadoop.FsELFunctions#fs_exists,
             fs:isDir=org.apache.oozie.action.hadoop.FsELFunctions#fs_isDir,
             fs:dirSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_dirSize,
@@ -720,7 +721,8 @@
             coord:endOfDays=org.apache.oozie.coord.CoordELFunctions#ph1_coord_endOfDays,
             coord:endOfMonths=org.apache.oozie.coord.CoordELFunctions#ph1_coord_endOfMonths,
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
-            coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user
+            coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -733,7 +735,8 @@
             coord:days=org.apache.oozie.coord.CoordELFunctions#ph1_coord_days,
             coord:months=org.apache.oozie.coord.CoordELFunctions#ph1_coord_months,
             coord:hours=org.apache.oozie.coord.CoordELFunctions#ph1_coord_hours,
-            coord:minutes=org.apache.oozie.coord.CoordELFunctions#ph1_coord_minutes
+            coord:minutes=org.apache.oozie.coord.CoordELFunctions#ph1_coord_minutes,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -780,7 +783,8 @@
         <name>oozie.service.ELService.functions.coord-job-submit-nofuncs</name>
         <value>
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
-            coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user
+            coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -833,7 +837,8 @@
             coord:formatTime=org.apache.oozie.coord.CoordELFunctions#ph1_coord_formatTime_echo,
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
             coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
-            coord:absolute=org.apache.oozie.coord.CoordELFunctions#ph1_coord_absolute_echo
+            coord:absolute=org.apache.oozie.coord.CoordELFunctions#ph1_coord_absolute_echo,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -894,7 +899,8 @@
             coord:dataInPartitionMax=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitionMax_echo,
             coord:dataInPartitions=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitions_echo,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitions_echo,
-            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitionValue_echo
+            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitionValue_echo,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -950,7 +956,8 @@
             coord:databaseOut=org.apache.oozie.coord.HCatELFunctions#ph1_coord_databaseOut_echo,
             coord:tableOut=org.apache.oozie.coord.HCatELFunctions#ph1_coord_tableOut_echo,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitions_echo,
-            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitionValue_echo
+            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitionValue_echo,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -1006,7 +1013,8 @@
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
             coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
             coord:absolute=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_echo,
-            coord:absoluteRange=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_range
+            coord:absoluteRange=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_range,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -1062,7 +1070,8 @@
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
             coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
             coord:absolute=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_echo,
-            coord:absoluteRange=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_range
+            coord:absoluteRange=org.apache.oozie.coord.CoordELFunctions#ph2_coord_absolute_range,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -1117,7 +1126,8 @@
             coord:databaseOut=org.apache.oozie.coord.HCatELFunctions#ph3_coord_databaseOut,
             coord:tableOut=org.apache.oozie.coord.HCatELFunctions#ph3_coord_tableOut,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitions,
-            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitionValue
+            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitionValue,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.
@@ -1183,7 +1193,8 @@
             coord:dataInPartitionMax=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitionMax,
             coord:dataInPartitions=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitions,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitions,
-            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitionValue
+            coord:dataOutPartitionValue=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitionValue,
+            hadoop:conf=org.apache.oozie.action.hadoop.HadoopELFunctions#hadoop_conf
         </value>
         <description>
             EL functions declarations, separated by commas, format is [PREFIX:]NAME=CLASS#METHOD.

http://git-wip-us.apache.org/repos/asf/oozie/blob/47d62743/core/src/test/java/org/apache/oozie/action/hadoop/TestHadoopELFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/oozie/action/hadoop/TestHadoopELFunctions.java b/core/src/test/java/org/apache/oozie/action/hadoop/TestHadoopELFunctions.java
index 8212f09..3bb35a5 100644
--- a/core/src/test/java/org/apache/oozie/action/hadoop/TestHadoopELFunctions.java
+++ b/core/src/test/java/org/apache/oozie/action/hadoop/TestHadoopELFunctions.java
@@ -33,6 +33,7 @@ import org.apache.oozie.workflow.lite.EndNodeDef;
 import org.apache.oozie.workflow.lite.LiteWorkflowApp;
 import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
 import org.apache.oozie.workflow.lite.StartNodeDef;
+import java.io.ByteArrayOutputStream;
 import java.util.HashMap;
 
 public class TestHadoopELFunctions extends ActionExecutorTestCase {
@@ -163,4 +164,37 @@ public class TestHadoopELFunctions extends ActionExecutorTestCase {
         assertEquals(new Long(33),
                 eval.evaluate("${hadoop:counters('H')['job_201111300933_0004']['MAP_INPUT_RECORDS']}", Long.class));
     }
+
+    public void testHadoopConfFunctions() throws Exception {
+        XConfiguration jobConf = new XConfiguration();
+        XConfiguration.copy(createJobConf(), jobConf);
+
+        String testHadoopOptionValue = jobConf.get("mapred.tasktracker.map.tasks.maximum");
+        jobConf.set("test.name.node.uri", getNameNodeUri());
+        jobConf.set("test.hadoop.option", "mapred.tasktracker.map.tasks.maximum");
+
+        WorkflowJobBean workflow = new WorkflowJobBean();
+        workflow.setProtoActionConf("<configuration/>");
+        LiteWorkflowApp wfApp = new LiteWorkflowApp("x", "<workflow-app/>",
+                        new StartNodeDef(
+                            LiteWorkflowStoreService.LiteControlNodeHandler.class, "a"));
+        wfApp.addNode(new EndNodeDef("a",
+                          LiteWorkflowStoreService.LiteControlNodeHandler.class));
+        WorkflowInstance wi = new LiteWorkflowInstance(wfApp, jobConf, "1");
+
+        workflow.setWorkflowInstance(wi);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        jobConf.writeXml(baos);
+        workflow.setProtoActionConf(baos.toString());
+
+        final WorkflowActionBean action = new WorkflowActionBean();
+        ELEvaluator eval = Services.get().get(ELService.class).createEvaluator(
+                    "workflow");
+        DagELFunctions.configureEvaluator(eval, workflow, action);
+
+        assertEquals(testHadoopOptionValue,
+                eval.evaluate("${hadoop:conf(wf:conf('test.name.node.uri'), wf:conf('test.hadoop.option'))}",
+                        String.class));
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/oozie/blob/47d62743/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/WorkflowFunctionalSpec.twiki b/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
index f0eb393..3319bcc 100644
--- a/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
+++ b/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
@@ -2011,6 +2011,36 @@ Below is the workflow that describes how to access specific information using ha
 </workflow-app>
 </verbatim>
 
+#Hadoop Configuration EL Function
+*String hadoop:conf(String hadoopConfHostPort, String propName)*
+
+It returns the value of a property of Hadoop configuration.
+
+The hadoopConfHostPort is the 'host:port' of a Hadoop cluster, such as 'NameNodeHostAddress:Port' and
+'JobTrackerHostAddress:Port'. The propName is the name of target property. If hadoopConfHostPort could
+be connected, Hadoop Conf EL Function will return the value of propName from the specific host address
+and port. If hadoopConfHostPort could not be connected, Hadoop Conf EL Function will generate a default
+hadoop configuration object directly, and then return the value of target property.
+
+*Example of usage of Hadoop Configuration EL Function:*
+
+<verbatim>
+<action name="mr-node">
+    <map-reduce>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.map.java.opts</name>
+                <value>${hadoop:conf("9.181.7.69:9001","mapreduce.map.java.opts")} -Xss512k </value>
+            </property>
+        </configuration>
+    </map-reduce>
+    <ok to="end"/>
+    <error to="fail"/>
+</action>
+</verbatim>
+
 ---++++ 4.2.6 Hadoop Jobs EL Function
 
 The function _wf:actionData()_ can be used to access Hadoop ID's for actions such as Pig, by specifying the key as _hadoopJobs_.

http://git-wip-us.apache.org/repos/asf/oozie/blob/47d62743/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 81559eb..1f7c322 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 4.1.0 release (trunk - unreleased)
 
+OOZIE-1740 Add a new function hadoop:conf() that can be invoked from the workflow.xml and will return a hadoop configuration option (sam liu via rkanter)
 OOZIE-1911 SLA calculation in HA mode does wrong bit comparison for 'start' and 'duration' (mna)
 OOZIE-1926 make gz blob compression as default (ryota)
 OOZIE-1916 Use Curator leader latch instead of checking the order of Oozie servers (rkanter)