You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by mo...@apache.org on 2013/02/07 22:38:21 UTC

svn commit: r1443738 - in /oozie/branches/hcat-intre: core/src/main/java/org/apache/oozie/coord/ core/src/main/java/org/apache/oozie/util/ core/src/main/resources/ core/src/test/java/org/apache/oozie/coord/ core/src/test/java/org/apache/oozie/util/ doc...

Author: mona
Date: Thu Feb  7 21:38:21 2013
New Revision: 1443738

URL: http://svn.apache.org/r1443738
Log:
OOZIE-1196 HCat EL functions for database and table should be modified (mona)

Modified:
    oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
    oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/util/HCatURI.java
    oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
    oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
    oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/util/TestHCatURI.java
    oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml

Modified: oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java (original)
+++ oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java Thu Feb  7 21:38:21 2013
@@ -38,7 +38,7 @@ public class HCatELFunctions {
     private static XLog LOG = XLog.getLog(HCatELFunctions.class);
     private static final Configuration EMPTY_CONF = new Configuration(true);
 
-    enum EVENT_TYPE {
+    enum EventType {
         input, output
     }
 
@@ -68,22 +68,34 @@ public class HCatELFunctions {
      * @param dataInName
      * @return the same EL function
      */
-    public static String ph1_coord_database_echo(String dataName, String type) {
-        // Checking if the dataIn/dataOut is correct?
+    public static String ph1_coord_databaseIn_echo(String dataName) {
+        // Checking if the dataIn is correct?
         isValidDataEvent(dataName);
-        return echoUnResolved("database", "'" + dataName + "', '" + type + "'");
+        return echoUnResolved("databaseIn", "'" + dataName + "'");
     }
 
-    public static String ph1_coord_table_echo(String dataName, String type) {
-        // Checking if the dataIn/dataOut is correct?
+    public static String ph1_coord_databaseOut_echo(String dataName) {
+        // Checking if the dataOut is correct?
+        isValidDataEvent(dataName);
+        return echoUnResolved("databaseOut", "'" + dataName + "'");
+    }
+
+    public static String ph1_coord_tableIn_echo(String dataName) {
+        // Checking if the dataIn is correct?
         isValidDataEvent(dataName);
-        return echoUnResolved("table", "'" + dataName + "', '" + type + "'");
+        return echoUnResolved("tableIn", "'" + dataName + "'");
     }
 
-    public static String ph1_coord_dataInPartitionPigFilter_echo(String dataInName) {
+    public static String ph1_coord_tableOut_echo(String dataName) {
+        // Checking if the dataOut is correct?
+        isValidDataEvent(dataName);
+        return echoUnResolved("tableOut", "'" + dataName + "'");
+    }
+
+    public static String ph1_coord_dataInPartitionFilter_echo(String dataInName, String type) {
         // Checking if the dataIn/dataOut is correct?
         isValidDataEvent(dataInName);
-        return echoUnResolved("dataInPartitionPigFilter", "'" + dataInName + "'");
+        return echoUnResolved("dataInPartitionFilter", "'" + dataInName + "', '" + type + "'");
     }
 
     public static String ph1_coord_dataInPartitionMin_echo(String dataInName, String partition) {
@@ -119,8 +131,27 @@ public class HCatELFunctions {
      * @param dataInName
      * @return DB name
      */
-    public static String ph3_coord_database(String dataName, String type) {
-        HCatURI hcatURI = getURIFromResolved(dataName, type);
+    public static String ph3_coord_databaseIn(String dataName) {
+        HCatURI hcatURI = getURIFromResolved(dataName, EventType.input);
+        if (hcatURI != null) {
+            return hcatURI.getDb();
+        }
+        else {
+            return "";
+        }
+    }
+
+    /**
+     * Extract the hcat DB name from the URI-template associate with
+     * 'dataOutName'. Caller needs to specify the EL-evaluator level variable
+     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
+     * (SyncCoordDataset)
+     *
+     * @param dataOutName
+     * @return DB name
+     */
+    public static String ph3_coord_databaseOut(String dataName) {
+        HCatURI hcatURI = getURIFromResolved(dataName, EventType.output);
         if (hcatURI != null) {
             return hcatURI.getDb();
         }
@@ -138,8 +169,27 @@ public class HCatELFunctions {
      * @param dataInName
      * @return Table name
      */
-    public static String ph3_coord_table(String dataName, String type) {
-        HCatURI hcatURI = getURIFromResolved(dataName, type);
+    public static String ph3_coord_tableIn(String dataName) {
+        HCatURI hcatURI = getURIFromResolved(dataName, EventType.input);
+        if (hcatURI != null) {
+            return hcatURI.getTable();
+        }
+        else {
+            return "";
+        }
+    }
+
+    /**
+     * Extract the hcat Table name from the URI-template associate with
+     * 'dataOutName'. Caller needs to specify the EL-evaluator level variable
+     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
+     * (SyncCoordDataset)
+     *
+     * @param dataOutName
+     * @return Table name
+     */
+    public static String ph3_coord_tableOut(String dataName) {
+        HCatURI hcatURI = getURIFromResolved(dataName, EventType.output);
         if (hcatURI != null) {
             return hcatURI.getTable();
         }
@@ -155,15 +205,16 @@ public class HCatELFunctions {
      * unresolved, this function will echo back the original function <p/> otherwise it sends the partition filter.
      *
      * @param dataInName : Datain name
+     * @param type : for action type - pig, MR or hive
      */
-    public static String ph3_coord_dataInPartitionPigFilter(String dataInName) {
+    public static String ph3_coord_dataInPartitionFilter(String dataInName, String type) {
         ELEvaluator eval = ELEvaluator.getCurrent();
         String uris = (String) eval.getVariable(".datain." + dataInName);
         Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
         if (unresolved != null && unresolved.booleanValue() == true) {
-            return "${coord:dataInPartitionPigFilter('" + dataInName + "')}";
+            return "${coord:dataInPartitionFilter('" + dataInName + "', '" + type + "')}";
         }
-        return createPartitionFilter(uris);
+        return createPartitionFilter(uris, type);
     }
 
     /**
@@ -205,10 +256,10 @@ public class HCatELFunctions {
         String uri = (String) eval.getVariable(".dataout." + dataOutName);
         Boolean unresolved = (Boolean) eval.getVariable(".dataout." + dataOutName + ".unresolved");
         if (unresolved != null && unresolved.booleanValue() == true) {
-            return "${coord:dataOutPartition('" + dataOutName + "')}";
+            return "${coord:dataOutPartitions('" + dataOutName + "')}";
         }
         try {
-            return new HCatURI(uri).toPartitionStringHCatStorer();
+            return new HCatURI(uri).toPartitionString();
         }
         catch (URISyntaxException e) {
             throw new RuntimeException("Parsing exception for HCatURI " + uri + ". details: " + e);
@@ -229,7 +280,7 @@ public class HCatELFunctions {
         String uris = (String) eval.getVariable(".datain." + dataInName);
         Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
         if (unresolved != null && unresolved.booleanValue() == true) {
-            return "${coord:dataInPartitionMin('" + dataInName + "')}";
+            return "${coord:dataInPartitionMin('" + dataInName + "', '" + partitionName + "')}";
         }
         String minPartition = null;
         if (uris != null) {
@@ -274,7 +325,7 @@ public class HCatELFunctions {
         String uris = (String) eval.getVariable(".datain." + dataInName);
         Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
         if (unresolved != null && unresolved.booleanValue() == true) {
-            return "${coord:dataInPartitionMin('" + dataInName + "')}";
+            return "${coord:dataInPartitionMin('" + dataInName + "', '" + partitionName + "')}";
         }
         String maxPartition = null;
         if (uris != null) {
@@ -304,7 +355,7 @@ public class HCatELFunctions {
         return maxPartition;
     }
 
-    private static String createPartitionFilter(String uris) {
+    private static String createPartitionFilter(String uris, String type) {
         String[] uriList = uris.split(CoordELFunctions.DIR_SEPARATOR);
         StringBuilder filter = new StringBuilder("");
         if (uriList.length > 0) {
@@ -313,7 +364,7 @@ public class HCatELFunctions {
                     filter.append(" OR ");
                 }
                 try {
-                    filter.append(new HCatURI(uri).toPigPartitionFilter());
+                    filter.append(new HCatURI(uri).toPartitionFilter(type));
                 }
                 catch (URISyntaxException e) {
                     throw new RuntimeException("Parsing exception for HCatURI " + uri + ". details: " + e);
@@ -323,11 +374,11 @@ public class HCatELFunctions {
         return filter.toString();
     }
 
-    private static HCatURI getURIFromResolved(String dataInName, String type) {
+    private static HCatURI getURIFromResolved(String dataInName, EventType type) {
         StringBuilder uriTemplate = new StringBuilder();
         ELEvaluator eval = ELEvaluator.getCurrent();
         String uris;
-        if(type.equals(EVENT_TYPE.input.toString())) {
+        if(type == EventType.input) {
             uris = (String) eval.getVariable(".datain." + dataInName);
         }
         else { //type=output

Modified: oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/util/HCatURI.java
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/util/HCatURI.java?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/util/HCatURI.java (original)
+++ oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/util/HCatURI.java Thu Feb  7 21:38:21 2013
@@ -225,17 +225,20 @@ public class HCatURI {
      * Convert the partition map to filter string. Each key value pair is
      * separated by AND
      *
+     * @param type pig/java/mr/hive
      * @return filter string
      */
-    public String toPigPartitionFilter() {
+    public String toPartitionFilter(String type) {
         StringBuilder filter = new StringBuilder();
+        String comparator = null;
         filter.append("(");
+        comparator = type.equalsIgnoreCase("pig") ? "==" : "=";
         for (Map.Entry<String, String> entry : partitions.entrySet()) {
             if (filter.length() > 1) {
                 filter.append(" AND ");
             }
             filter.append(entry.getKey());
-            filter.append("==");
+            filter.append(comparator);
             filter.append(PARTITION_VALUE_QUOTE);
             filter.append(entry.getValue());
             filter.append(PARTITION_VALUE_QUOTE);
@@ -249,7 +252,7 @@ public class HCatURI {
      *
      * @return filter string
      */
-    public String toPartitionStringHCatStorer() {
+    public String toPartitionString() {
         StringBuilder filter = new StringBuilder();
         filter.append("'");
         for (Map.Entry<String, String> entry : partitions.entrySet()) {
@@ -264,27 +267,6 @@ public class HCatURI {
         return filter.toString();
     }
 
-    /**
-     * Convert the partition map to filter string. Each key value pair is
-     * separated by AND
-     *
-     * @return filter string
-     */
-    public String toFilter() {
-        StringBuilder filter = new StringBuilder();
-        for (Map.Entry<String, String> entry : partitions.entrySet()) {
-            if (filter.length() > 0) {
-                filter.append(" AND ");
-            }
-            filter.append(entry.getKey());
-            filter.append("=");
-            filter.append(PARTITION_VALUE_QUOTE);
-            filter.append(entry.getValue());
-            filter.append(PARTITION_VALUE_QUOTE);
-        }
-        return filter.toString();
-    }
-
     @Override
     public String toString() {
         StringBuilder sb = new StringBuilder();

Modified: oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml (original)
+++ oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml Thu Feb  7 21:38:21 2013
@@ -789,9 +789,11 @@
             coord:name=org.apache.oozie.coord.CoordELFunctions#ph1_coord_name_echo,
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
             coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
-            coord:database=org.apache.oozie.coord.HCatELFunctions#ph1_coord_database_echo,
-            coord:table=org.apache.oozie.coord.HCatELFunctions#ph1_coord_table_echo,
-            coord:dataInPartitionPigFilter=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitionPigFilter_echo,
+            coord:databaseIn=org.apache.oozie.coord.HCatELFunctions#ph1_coord_databaseIn_echo,
+            coord:databaseOut=org.apache.oozie.coord.HCatELFunctions#ph1_coord_databaseOut_echo,
+            coord:tableIn=org.apache.oozie.coord.HCatELFunctions#ph1_coord_tableIn_echo,
+            coord:tableOut=org.apache.oozie.coord.HCatELFunctions#ph1_coord_tableOut_echo,
+            coord:dataInPartitionFilter=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitionFilter_echo,
             coord:dataInPartitionMin=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitionMin_echo,
             coord:dataInPartitionMax=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataInPartitionMax_echo,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph1_coord_dataOutPartitions_echo,
@@ -1049,9 +1051,11 @@
             coord:name=org.apache.oozie.coord.CoordELFunctions#ph3_coord_name,
             coord:conf=org.apache.oozie.coord.CoordELFunctions#coord_conf,
             coord:user=org.apache.oozie.coord.CoordELFunctions#coord_user,
-            coord:database=org.apache.oozie.coord.HCatELFunctions#ph3_coord_database,
-            coord:table=org.apache.oozie.coord.HCatELFunctions#ph3_coord_table,
-            coord:dataInPartitionPigFilter=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitionPigFilter,
+            coord:databaseIn=org.apache.oozie.coord.HCatELFunctions#ph3_coord_databaseIn,
+            coord:databaseOut=org.apache.oozie.coord.HCatELFunctions#ph3_coord_databaseOut,
+            coord:tableIn=org.apache.oozie.coord.HCatELFunctions#ph3_coord_tableIn,
+            coord:tableOut=org.apache.oozie.coord.HCatELFunctions#ph3_coord_tableOut,
+            coord:dataInPartitionFilter=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitionFilter,
             coord:dataInPartitionMin=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitionMin,
             coord:dataInPartitionMax=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataInPartitionMax,
             coord:dataOutPartitions=org.apache.oozie.coord.HCatELFunctions#ph3_coord_dataOutPartitions,

Modified: oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java (original)
+++ oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java Thu Feb  7 21:38:21 2013
@@ -67,7 +67,7 @@ public class TestHCatELFunctions extends
         dropDatabase("db1", true);
         createDatabase("db1");
         createTable("db1", "table1", "year,month,dt,country");
-        addPartition("db1", "table1", "year=2012;month=12;dt=02;country=us");;
+        addPartition("db1", "table1", "year=2012;month=12;dt=02;country=us");
 
         Configuration protoConf = new Configuration();
         protoConf.set(OozieClient.USER_NAME, getTestUser());
@@ -80,10 +80,9 @@ public class TestHCatELFunctions extends
         conf.set("partition1", getHCatURI("db1", "table1", "dt=02").toString());
         conf.set("partition2", getHCatURI("db1", "table1", "dt=05").toString());
 
-        LiteWorkflowApp def =
-                new LiteWorkflowApp("name", "<workflow-app/>",
-                                    new StartNodeDef(LiteWorkflowStoreService.LiteControlNodeHandler.class, "end")).
-                    addNode(new EndNodeDef("end", LiteWorkflowStoreService.LiteControlNodeHandler.class));
+        LiteWorkflowApp def = new LiteWorkflowApp("name", "<workflow-app/>", new StartNodeDef(
+                LiteWorkflowStoreService.LiteControlNodeHandler.class, "end")).addNode(new EndNodeDef("end",
+                LiteWorkflowStoreService.LiteControlNodeHandler.class));
         LiteWorkflowInstance job = new LiteWorkflowInstance(def, conf, "wfId");
 
         WorkflowJobBean wf = new WorkflowJobBean();
@@ -111,84 +110,114 @@ public class TestHCatELFunctions extends
     }
 
     /**
-     * Test HCat database EL function (phase 1) which echo back the EL function
-     * itself
+     * Test HCat databaseIn and databaseOut EL functions (phase 1) which echo
+     * back the EL function itself
      *
      * @throws Exception
      */
     @Test
     public void testDatabasePh1() throws Exception {
         init("coord-job-submit-data");
-        String expr = "${coord:database('ABC', 'input')}";
+        /*
+         * databaseIn
+         */
+        String expr = "${coord:databaseIn('ABC')}";
         // +ve test
         eval.setVariable("oozie.dataname.ABC", "data-in");
-        assertEquals("${coord:database('ABC', 'input')}", CoordELFunctions.evalAndWrap(eval, expr));
+        assertEquals("${coord:databaseIn('ABC')}", CoordELFunctions.evalAndWrap(eval, expr));
         // -ve test
-        expr = "${coord:database('ABCD', 'input')}";
+        expr = "${coord:databaseIn('ABCD')}";
         try {
-            assertEquals("${coord:database('ABCD', 'input')}", CoordELFunctions.evalAndWrap(eval, expr));
-            fail("should throw exception beacuse Data in is not defiend");
+            assertEquals("${coord:databaseIn('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
+            fail("should throw exception because Data-in ABCD is not defiend");
         }
         catch (Exception ex) {
         }
-        expr = "${coord:database('ABC', 'output')}";
+        /*
+         * databaseOut
+         */
+        expr = "${coord:databaseOut('ABC')}";
+        // +ve test
         eval.setVariable("oozie.dataname.ABC", "data-out");
-        assertEquals("${coord:database('ABC', 'output')}", CoordELFunctions.evalAndWrap(eval, expr));
+        assertEquals("${coord:databaseOut('ABC')}", CoordELFunctions.evalAndWrap(eval, expr));
+        // -ve test
+        expr = "${coord:databaseOut('ABCD')}";
+        try {
+            assertEquals("${coord:databaseOut('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
+            fail("should throw exception because Data-out ABCD is not defiend");
+        }
+        catch (Exception ex) {
+        }
     }
 
     /**
-     * Test HCat table EL function (phase 1) which echo back the EL function
-     * itself
+     * Test HCat tableIn and tableOut EL functions (phase 1) which echo back the
+     * EL function itself
      *
      * @throws Exception
      */
     @Test
     public void testTablePh1() throws Exception {
         init("coord-job-submit-data");
-        String expr = "${coord:table('ABC', 'input')}";
+        /*
+         * tableIn
+         */
+        String expr = "${coord:tableIn('ABC')}";
         // +ve test
         eval.setVariable("oozie.dataname.ABC", "data-in");
-        assertEquals("${coord:table('ABC', 'input')}", CoordELFunctions.evalAndWrap(eval, expr));
+        assertEquals("${coord:tableIn('ABC')}", CoordELFunctions.evalAndWrap(eval, expr));
         // -ve test
-        expr = "${coord:table('ABCD', 'input')}";
+        expr = "${coord:tableIn('ABCD')}";
         try {
-            assertEquals("${coord:table('ABCD', 'input')}", CoordELFunctions.evalAndWrap(eval, expr));
-            fail("should throw exception beacuse Data in is not defiend");
+            assertEquals("${coord:tableIn('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
+            fail("should throw exception because Data-in ABCD is not defiend");
         }
         catch (Exception ex) {
         }
-        expr = "${coord:table('ABC', 'output')}";
+        /*
+         * tableOut
+         */
+        expr = "${coord:tableOut('ABC')}";
         // +ve test
         eval.setVariable("oozie.dataname.ABC", "data-out");
-        assertEquals("${coord:table('ABC', 'output')}", CoordELFunctions.evalAndWrap(eval, expr));
+        assertEquals("${coord:tableOut('ABC')}", CoordELFunctions.evalAndWrap(eval, expr));
+        // -ve test
+        expr = "${coord:tableOut('ABCD')}";
+        try {
+            assertEquals("${coord:tableOut('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
+            fail("should throw exception because Data-out ABCD is not defiend");
+        }
+        catch (Exception ex) {
+        }
     }
 
     /**
-     * Test HCat dataInPartitionPigFilter EL function (phase 1) which echo back the
-     * EL function itself
+     * Test HCat dataInPartitionPigFilter EL function (phase 1) which echo back
+     * the EL function itself
      *
      * @throws Exception
      */
     @Test
-    public void testdataInPartitionPigFilterPh1() throws Exception {
+    public void testdataInPartitionFilterPh1() throws Exception {
         init("coord-job-submit-data");
-        String expr = "${coord:dataInPartitionPigFilter('ABC')}";
+        String expr = "${coord:dataInPartitionFilter('ABC', 'pig')}";
         // +ve test
         eval.setVariable("oozie.dataname.ABC", "data-in");
-        assertEquals("${coord:dataInPartitionPigFilter('ABC')}", CoordELFunctions.evalAndWrap(eval, expr));
+        assertEquals("${coord:dataInPartitionFilter('ABC', 'pig')}", CoordELFunctions.evalAndWrap(eval, expr));
         // -ve test
-        expr = "${coord:dataInPartitionPigFilter('ABCD')}";
+        expr = "${coord:dataInPartitionFilter('ABCD')}";
+        eval.setVariable("oozie.dataname.ABCD", "data-in");
         try {
-            assertEquals("${coord:dataInPartitionPigFilter('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
-            fail("should throw exception beacuse Data in is not defiend");
+            assertEquals("${coord:dataInPartitionFilter('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
+            fail("should throw exception because dataInPartitionFilter() requires 2 parameters");
         }
         catch (Exception ex) {
         }
     }
 
     /**
-     * Test HCat dataInPartitionMin EL function (phase 1) which echo back the
-     * EL function itself
+     * Test HCat dataInPartitionMin EL function (phase 1) which echo back the EL
+     * function itself
      *
      * @throws Exception
      */
@@ -211,8 +240,8 @@ public class TestHCatELFunctions extends
     }
 
     /**
-     * Test HCat dataInPartitionMax EL function (phase 1) which echo back the
-     * EL function itself
+     * Test HCat dataInPartitionMax EL function (phase 1) which echo back the EL
+     * function itself
      *
      * @throws Exception
      */
@@ -235,8 +264,8 @@ public class TestHCatELFunctions extends
     }
 
     /**
-     * Test HCat dataOutPartition EL function (phase 1) which echo back the
-     * EL function itself
+     * Test HCat dataOutPartition EL function (phase 1) which echo back the EL
+     * function itself
      *
      * @throws Exception
      */
@@ -251,7 +280,7 @@ public class TestHCatELFunctions extends
         expr = "${coord:dataOutPartitions('ABCD')}";
         try {
             assertEquals("${coord:dataOutPartitions('ABCD')}", CoordELFunctions.evalAndWrap(eval, expr));
-            fail("should throw exception beacuse Data in is not defiend");
+            fail("should throw exception because Data-in is not defiend");
         }
         catch (Exception ex) {
         }
@@ -282,7 +311,8 @@ public class TestHCatELFunctions extends
     }
 
     /**
-     * Test database EL function (phase 3) which returns the DB name from URI
+     * Test databaseIn and databaseOut EL functions (phase 3) which returns the
+     * DB name from URI
      *
      * @throws Exception
      */
@@ -291,17 +321,17 @@ public class TestHCatELFunctions extends
         init("coord-action-start", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".datain.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".datain.ABC.unresolved", Boolean.FALSE);
-        String expr = "${coord:database('ABC', 'input')}";
+        String expr = "${coord:databaseIn('ABC')}";
         assertEquals("mydb", CoordELFunctions.evalAndWrap(eval, expr));
 
         eval.setVariable(".dataout.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".dataout.ABC.unresolved", Boolean.FALSE);
-        expr = "${coord:database('ABC', 'output')}";
+        expr = "${coord:databaseOut('ABC')}";
         assertEquals("mydb", CoordELFunctions.evalAndWrap(eval, expr));
     }
 
     /**
-     * Test HCat table EL function (phase 3) which returns the HCat table from
+     * Test HCat tableIn and tableOut EL functions (phase 3) which returns the HCat table from
      * URI
      *
      * @throws Exception
@@ -311,12 +341,12 @@ public class TestHCatELFunctions extends
         init("coord-action-start", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".datain.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".datain.ABC.unresolved", Boolean.FALSE);
-        String expr = "${coord:table('ABC', 'input')}";
+        String expr = "${coord:tableIn('ABC')}";
         assertEquals("clicks", CoordELFunctions.evalAndWrap(eval, expr));
 
         eval.setVariable(".dataout.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".dataout.ABC.unresolved", Boolean.FALSE);
-        expr = "${coord:table('ABC', 'output')}";
+        expr = "${coord:tableOut('ABC')}";
         assertEquals("clicks", CoordELFunctions.evalAndWrap(eval, expr));
     }
 
@@ -327,27 +357,39 @@ public class TestHCatELFunctions extends
      * @throws Exception
      */
     @Test
-    public void testdataInPartitionPigFilter() throws Exception {
+    public void testdataInPartitionFilter() throws Exception {
         init("coord-action-start");
         eval.setVariable(".datain.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
         eval.setVariable(".datain.ABC.unresolved", Boolean.FALSE);
-        String expr = "${coord:dataInPartitionPigFilter('ABC')}";
+        /*
+         * type=pig
+         */
+        String expr = "${coord:dataInPartitionFilter('ABC', 'pig')}";
         String res = CoordELFunctions.evalAndWrap(eval, expr);
         assertTrue(res.equals("(datastamp=='12' AND region=='us')") || res.equals("(region=='us' AND datastamp=='12')"));
 
         eval.setVariable(".datain.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us,"
                 + "hcat://hcat.server.com:5080/mydb/clicks/datastamp=13;region=us");
         eval.setVariable(".datain.ABC.unresolved", Boolean.FALSE);
-        expr = "${coord:dataInPartitionPigFilter('ABC')}";
+        expr = "${coord:dataInPartitionFilter('ABC', 'pig')}";
         res = CoordELFunctions.evalAndWrap(eval, expr);
         assertTrue(res.equals("(datastamp=='12' AND region=='us') OR (datastamp=='13' AND region=='us')")
                 || res.equals("(datastamp=='12' AND region=='us') OR (region=='us' AND datastamp=='13')")
                 || res.equals("(region=='us' AND datastamp=='12') OR (datastamp=='13' AND region=='us')")
                 || res.equals("(region=='us' AND datastamp=='12') OR (region=='us' AND datastamp=='13')"));
+
+        /*
+         * type=java
+         */
+        eval.setVariable(".datain.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=12;region=us");
+        eval.setVariable(".datain.ABC.unresolved", Boolean.FALSE);
+        expr = "${coord:dataInPartitionFilter('ABC', 'java')}";
+        res = CoordELFunctions.evalAndWrap(eval, expr);
+        assertTrue(res.equals("(datastamp='12' AND region='us')") || res.equals("(region='us' AND datastamp='12')"));
     }
 
     /**
-     * Test dataOutPartition EL function (phase 3) which returns the partition
+     * Test dataOutPartitionsPig EL function (phase 3) which returns the partition
      * to be used as output to store data into
      *
      * @throws Exception
@@ -359,8 +401,7 @@ public class TestHCatELFunctions extends
         eval.setVariable(".dataout.ABC", "hcat://hcat.server.com:5080/mydb/clicks/datastamp=20120230;region=us");
         eval.setVariable(".dataout.ABC.unresolved", Boolean.FALSE);
         String res = CoordELFunctions.evalAndWrap(eval, expr);
-        assertTrue(res.equals("'datastamp=20120230,region=us'")
-                || res.equals("'region=us,datastamp=20120230'"));
+        assertTrue(res.equals("'datastamp=20120230,region=us'") || res.equals("'region=us,datastamp=20120230'"));
     }
 
     /**
@@ -428,8 +469,8 @@ public class TestHCatELFunctions extends
 
     private void init(String tag, String uriTemplate) throws Exception {
         eval = Services.get().get(ELService.class).createEvaluator(tag);
-        eval.setVariable(OozieClient.USER_NAME, "test_user");
-        eval.setVariable(OozieClient.GROUP_NAME, "test_group");
+        eval.setVariable(OozieClient.USER_NAME, getTestUser());
+        eval.setVariable(OozieClient.GROUP_NAME, getTestGroup());
         appInst = new SyncCoordAction();
         ds = new SyncCoordDataset();
         ds.setFrequency(1);

Modified: oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/util/TestHCatURI.java
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/util/TestHCatURI.java?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/util/TestHCatURI.java (original)
+++ oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/util/TestHCatURI.java Thu Feb  7 21:38:21 2013
@@ -145,12 +145,12 @@ public class TestHCatURI {
         String hcatURI = "hcat://hcat.server.com:5080/mydb/clicks/datastamp=20120230;region=us";
         String filter = "";
         try {
-            filter = new HCatURI(hcatURI).toFilter();
+            filter = new HCatURI(hcatURI).toPartitionFilter("java");
         }
         catch (URISyntaxException e) {
             fail(e.getMessage());
         }
-        assertTrue(filter.equals("datastamp='20120230' AND region='us'")
-                || filter.equals("region='us' AND datastamp='20120230'"));
+        assertTrue(filter.equals("(datastamp='20120230' AND region='us')")
+                || filter.equals("(region='us' AND datastamp='20120230')"));
     }
 }

Modified: oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki (original)
+++ oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki Thu Feb  7 21:38:21 2013
@@ -100,19 +100,13 @@ This document defines the functional spe
 *Nominal time:* The nominal time specifies the time when something should happen. In theory the nominal time and the actual time should match, however, in practice due to delays the actual time may occur later than the nominal time.
 
 *Dataset:* Collection of data referred to by a logical name. A dataset normally has several instances of data and each
-one of them can be referred individually. Each dataset instance is represented by a unique set of URIs. Each URI could
-be a hdfs path URI denoting the hdfs directory: hdfs://foo:8020/usr/logs/20090415 or a HCatalog partition URI
-identifying a set of table partitions: hcat://bar:8020/logsDB/logsTable/dt=20090415;region=US. HCatalog enables table
-and storage management for PIG and Hive. Note that the HCatalog metastore server would be the same as Hive metastore
-server for users just using Hive together with Hive Metastore Server and no PIG.
-The format to specify a hcatalog table partition URI is
-hcat://[metastore server]:[port]/[database name]/[table name]/[partkey1]=[value];[partkey2]=[value].
+one of them can be referred individually. Each dataset instance is represented by a unique set of URIs.
 
 *Synchronous Dataset:* Synchronous datasets instances are generated at fixed time intervals and there is a dataset
 instance associated with each time interval. Synchronous dataset instances are identified by their nominal time.
-For example, in the case of a file system based dataset, the nominal time would be somewhere in the file path of the
-dataset instance: hdfs://foo:8020/usr/logs/2009/04/15/23/30. In the case of hcatalog table partitions, the nominal time
-would be part of some partition value: hcat://bar:8020/mydb/mytable/year=2009;month=04;dt=15;region=us.
+For example, in the case of a HDFS based dataset, the nominal time would be somewhere in the file path of the
+dataset instance: hdfs://foo:8020/usr/logs/2009/04/15/23/30. In the case of HCatalog table partitions, the nominal time
+would be part of some partition values: hcat://bar:8020/mydb/mytable/year=2009;month=04;dt=15;region=us.
 
 *Coordinator Action:* A coordinator action is a workflow job that is started when a set of conditions are met (input dataset instances are available).
 
@@ -587,11 +581,29 @@ The dataset resolves to the following UR
   ...
 </verbatim>
 
+---+++ 5.2. Dataset URI-Template types
 
----+++ 5.2. Asynchronous Datasets
+Each dataset URI could be a HDFS path URI denoting a HDFS directory: hdfs://foo:8020/usr/logs/20090415 or a
+HCatalog partition URI identifying a set of table partitions: hcat://bar:8020/logsDB/logsTable/dt=20090415;region=US.
+
+HCatalog enables table and storage management for Pig, Hive and MapReduce. The format to specify a HCatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table name]/[partkey1]=[value];[partkey2]=[value];...
+
+For example,
+<verbatim>
+  <dataset name="logs" frequency="${coord:days(1)}"
+           initial-instance="2009-02-15T08:15Z" timezone="America/Los_Angeles">
+    <uri-template>
+      hcat://myhcatmetastore:9080/database1/table1/myfirstpartitionkey=myfirstvalue;mysecondpartitionkey=mysecondvalue
+    </uri-template>
+    <done-flag></done-flag>
+  </dataset>
+</verbatim>
+
+---+++ 5.3. Asynchronous Datasets
    * TBD
 
----+++ 5.3. Dataset Definitions
+---+++ 5.4. Dataset Definitions
 
 Dataset definitions are grouped in XML files.
 *IMPORTANT:* Please note that if an XML namespace version is specified for the coordinator-app element in the coordinator.xml file, no namespace needs to be defined separately for the datasets element (even if the dataset is defined in a separate file). Specifying it at multiple places might result in xml errors while submitting the coordinator job.
@@ -2003,7 +2015,7 @@ The =${coord:dataIn(String name)}= EL fu
 
 The =${coord:dataIn(String name)}= is commonly used to pass the URIs of dataset instances that will be consumed by a workflow job triggered by a coordinator action.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 Coordinator application definition:
 
@@ -2061,18 +2073,18 @@ The =${coord:dataOut(String name)}= EL f
 
 The =${coord:dataOut(String name)}= is commonly used to pass the URIs of a dataset instance that will be produced by a workflow job triggered by a coordinator action.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 Datasets Definition file 'datasets.xml'
 
 <verbatim>
 <datasets>
-
+.
   <dataset name="hourlyLogs" frequency="${coord:hours(1)}"
            initial-instance="2009-01-01T01:00Z" timezone="UTC">
     <uri-template>hdfs://bar:8020/app/logs/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
   </dataset>
-
+.
   <dataset name="dailyLogs" frequency="${coord:days(1)}"
            initial-instance="2009-01-01T24:00Z" timezone="UTC">
     <uri-template>hdfs://bar:8020/app/daily-logs/${YEAR}/${MONTH}/${DAY}</uri-template>
@@ -2138,7 +2150,7 @@ The nominal times is always the coordina
 
 This is, when the coordinator action was created based on driver event. For synchronous coordinator applications this would be every tick of the frequency.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 Coordinator application definition:
 
@@ -2194,7 +2206,7 @@ When the coordinator action is created b
 actual time is less than the nominal time if coordinator job is in running in current mode. If job is running
 as catch-up mode (job's start time is in the past), the actual time is greater than the nominal time.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 Coordinator application definition:
 
@@ -2241,45 +2253,61 @@ If coordinator job was started at 2011-0
 
 The =coord:user()= function returns the user that started the coordinator job.
 
----+++ 6.8 Parameterization of HCatalog data instances in Coordinator Actions (since Oozie 4.x)
+---+++ 6.8 Using HCatalog data instances in Coordinator Applications (since Oozie 4.x)
 
-This section describes the different EL functions that work with HCatalog data dependencies, in order to parameterize them for the coordinator actions running workflows.
+This section describes the different EL functions that work with HCatalog data dependencies, in order to write
+Coordinator applications that use HCatalog data dependencies.
 
----++++ 6.8.1 coord:database(String name, String type) EL function
+---++++ 6.8.1 coord:databaseIn(String name), coord:databaseOut(String name) EL function
 
-The =${coord:database(String name, String type)}= is used to pass the database name of HCat dataset instances that will be consumed by a workflow job triggered by a coordinator action.
+The functions =${coord:databaseIn(String name}= and =${coord:databaseOut(String name}= are used to pass the database
+name of HCat dataset instances, input and output respectively, that will be consumed by a workflow job triggered
+by a coordinator action.
 
-The =${coord:database(String name, String type)}= EL function takes two arguments - name of the dataset, and type of event ('input' or 'output').
-It gives as string the 'database' name from the dataset passed as argument. If the dataset is from input-events, the second argument should be
-'input' and similarly if from output-events, it should be 'output'.
+These functions take as argument - name of the dataset and give as string the 'database' name of that dataset.
+If dataset belongs to 'input-events', use =${coord:databaseIn(String name}=, else if it belongs to 'output-events',
+use =${coord:databaseOut(String name}=.
 
 Refer to the [[CoordinatorFunctionalSpec#HCatPigExampleOne][Example]] below for usage.
 
----++++ 6.8.2 coord:table(String name, String type) EL function
-
-The =${coord:table(String name, String type)}= is used to pass the table name of HCat dataset instances that will be consumed by a workflow job triggered by a coordinator action.
+---++++ 6.8.2 coord:tableIn(String name), coord:tableOut(String name) EL function
 
-The =${coord:table(String name, String type)}= EL function takes two arguments - name of the dataset, and type of event ('input' or 'output').
-It gives as string the 'table' name from the dataset passed as argument. If the dataset belongs to input-events, the second argument should be
-'input' and similarly if belonging to output-events, it should be 'output'.
+The functions =${coord:tableIn(String name}= and =${coord:tableOut(String name}= are used to pass the table
+name of HCat dataset instances, input and output respectively, that will be consumed by a workflow job triggered
+by a coordinator action.
+
+These functions take as argument - name of the dataset and give as string the 'table' name of that dataset.
+If dataset belongs to 'input-events', use =${coord:tableIn(String name}=, else if it belongs to 'output-events',
+use =${coord:tableOut(String name}=.
 
 Refer to the [[CoordinatorFunctionalSpec#HCatPigExampleOne][Example]] below for usage.
 
----++++ 6.8.3 coord:dataInPartitionPigFilter(String name) EL function
+---++++ 6.8.3 coord:dataInPartitionFilter(String name, String type) EL function
 
-The =${coord:dataInPartitionPigFilter(String name)}= EL function resolves to a filter clause in pig scripts to filter all the partitions corresponding to the dataset instances specified in an input event dataset section.
-The filter clause from the EL function is to be passed as a parameter to the Pig action in a workflow which would be triggered by the coordinator action.
-
-In the filter clause string given by ${coord:dataInPartitionPigFilter()}, there is double "==" between key and value - specific to how Pig job accepts partition filter to be given to HCatLoader.
-Therefore this EL function is named specific to the Pig case.
+The =${coord:dataInPartitionFilter(String name, String type)}= EL function resolves to a filter clause to filter
+all the partitions corresponding to the dataset instances specified in an input event dataset section. This EL function
+takes two arguments - the name of the input dataset, and the type of the workflow action which will be consuming this filter.
+There are 3 types - 'pig', 'hive' and 'java'. This filter clause from the EL function is to be passed as a parameter in the
+respective action in the workflow.
+
+The evaluated value of the filter clause will vary based on the action type passed to the EL function. In case of pig,
+the filter will have "==" as the equality operator in the condition. In case of hive and java, the filter will have "="
+as the equality operator in the condition. The type java is for java actions, which use HCatInputFormat directly and
+launch jobs. The filter clause in that case can be used to construct the InputJobInfo in
+=HCatInputFormat.setInput(Job job, InputJobInfo inputJobInfo)=.
 
 Refer to the [[CoordinatorFunctionalSpec#HCatPigExampleOne][Example]] below for usage.
 
 ---++++ 6.8.4 coord:dataOutPartitions(String name) EL function
 
-The =${coord:dataOutPartitions(String name)}= EL function resolves to a comma-separated list of partition key-value pairs for the output-event dataset. This can be passed as an argument to HCatStorer in pig scripts.
-
-The example below illustrates a pig job triggered by a coordinator, using the EL functions for HCat database, table, input partitions pig filter and output partitions. The example takes as input previous day's hourly data to produce aggregated daily output.
+The =${coord:dataOutPartitions(String name)}= EL function resolves to a comma-separated list of partition key-value
+pairs for the output-event dataset. This can be passed as an argument to HCatStorer in Pig scripts or in case of
+java actions that directly use HCatOutputFormat and launch jobs, the partitions list can be parsed to construct
+partition values map for OutputJobInfo in =HcatOutputFormat.setOutput(Job job, OutputJobInfo outputJobInfo)=.
+
+The example below illustrates a pig job triggered by a coordinator, using the EL functions for HCat database, table,
+input partitions filter and output partitions. The example takes as input previous day's hourly data to produce
+aggregated daily output.
 
 
 *%GREEN% Example: %ENDCOLOR%*
@@ -2322,23 +2350,23 @@ The example below illustrates a pig job 
           <configuration>
             <property>
               <name>IN_DB</name>
-              <value>${coord:database('raw-logs', 'input')}</value>
+              <value>${coord:databaseIn('Click-data')}</value>
             </property>
             <property>
               <name>IN_TABLE</name>
-              <value>${coord:table('raw-logs', 'input')}</value>
+              <value>${coord:tableIn('raw-logs')}</value>
             </property>
             <property>
               <name>FILTER</name>
-              <value>${coord:dataInPartitionPigFilter('raw-logs')}</value>
+              <value>${coord:dataInPartitionFilter('raw-logs', 'pig')}</value>
             </property>
             <property>
               <name>OUT_DB</name>
-              <value>${coord:database('processed-logs', 'output')}</value>
+              <value>${coord:databaseOut('processed-logs')}</value>
             </property>
             <property>
               <name>OUT_TABLE</name>
-              <value>${coord:table('processed-logs', 'output')}</value>
+              <value>${coord:tableOut('processed-logs')}</value>
             </property>
             <property>
               <name>OUT_PARTITIONS</name>
@@ -2351,25 +2379,41 @@ The example below illustrates a pig job 
 </blockquote>
 
 
-Parameterizing the input/output databases and tables using the corresponding EL function as shown will make them available in the pig action of the workflow 'logsprocessor-wf'.
+Parameterizing the input/output databases and tables using the corresponding EL function as shown will make them
+available in the pig action of the workflow 'logsprocessor-wf'.
 
-Each coordinator action will use as input events the last 24 hourly instances of the 'Click-data' dataset.The =${coord:dataInPartitionPigFilter(String name)}= function enables the coordinator application
-to pass the Partition Filter corresponding to all the dataset instances for the last 24 hours to the workflow job triggered by the coordinator action.
-The =${coord:dataOutPartitions(String name)}= function enables the coordinator application to pass the partition key-value string needed by the *HCatStorer* in Pig job when the workflow is triggered by the coordinator action.
+Each coordinator action will use as input events the last 24 hourly instances of the 'Click-data' dataset.
+The =${coord:dataInPartitionFilter(String name, String type)}= function enables the coordinator application to pass the
+Partition Filter corresponding to all the dataset instances for the last 24 hours to the workflow job triggered
+by the coordinator action. The =${coord:dataOutPartitions(String name)}= function enables the coordinator application
+to pass the partition key-value string needed by the *HCatStorer* in Pig job when the workflow is triggered by the
+coordinator action.
 
 #HCatWorkflow
 ---++++ Workflow definition:
 
 <blockquote>
 <workflow-app xmlns="uri:oozie:workflow:0.3" name="logsprocessor-wf">
+    <credentials>
+      <credential name='hcatauth' type='hcat'>
+        <property>
+          <name>hcat.metastore.uri</name>
+          <value>${HCAT_URI}</value>
+        <property>
+        </property>
+          <name>hcat.metastore.principal</name>
+          <value>${HCAT_PRINCIPAL}</value>
+        <property>
+      </credential>
+    </credentials>
     <start to="pig-node"/>
-    <action name="pig-node">
+    <action name="pig-node" cred="hcatauth">
         <pig>
             <job-tracker>${jobTracker}</job-tracker>
             <name-node>${nameNode}</name-node>
             <prepare>
-                <delete path="hcat://foo:11002/${OUT_DB}/${OUT_TABLE}/date=${OUT_PARTITION_VAL_DATE}"/>  
-            </prepare> 
+                <delete path="hcat://foo:11002/${OUT_DB}/${OUT_TABLE}/date=${OUT_PARTITION_VAL_DATE}"/>
+            </prepare>
             ...
             <script>id.pig</script>
 		    <param>HCAT_IN_DB=${IN_DB}</param>
@@ -2390,10 +2434,10 @@ The =${coord:dataOutPartitions(String na
 </workflow-app>
 </blockquote>
 
-   Ensure that the lib directory of the workflow contains the following jars with versions corresponding to
-hcatalog installation - hcatalog.jar, webhcat-java-client.jar, hive-common.jar, hive-exec.jar, hive-metastore.jar,
-hive-serde.jar, libfb303.jar, pig.jar. You can also specify the jars using =archive= tag. The jars are required
-to work with hcatalog and pig. The hive-site.xml needs to be provided using =file= tag.
+Ensure that the following jars are in classpath, with versions corresponding to hcatalog installation:
+hcatalog-core.jar, webhcat-java-client.jar, hive-common.jar, hive-exec.jar, hive-metastore.jar, hive-serde.jar,
+libfb303.jar and pig.jar. You can also specify the jars using =archive= tag. The hive-site.xml needs to be
+provided using =file= tag.
 
 *Example usage in Pig:*
 
@@ -2404,7 +2448,8 @@ C = foreach B generate foo, bar;
 store C into '$HCAT_OUT_DB.$HCAT_OUT_TABLE' using org.apache.hcatalog.pig.HCatStorer('$OUTPUT_PARTITIONS');
 </blockquote>
 
-For the =2009-01-02T00:00Z= run with the given dataset instances, the above Pig script with resolved values would look like:
+For the =2009-01-02T00:00Z= run with the given dataset instances, the above Pig script with resolved values would look
+like:
 
 <blockquote>
 A = load 'myInputDatabase.myInputTable' using org.apache.hcatalog.pig.HCatLoader();
@@ -2419,25 +2464,32 @@ store C into 'myOutputDatabase.myOutputT
 
 ---++++ 6.8.4 coord:dataInPartitionMin(String name, String partition) EL function
 
-The =${coord:dataInPartitionMin(String name, String partition)}= EL function resolves to the *minimum* value of the specified partition for all the dataset instances specified in an input event dataset section.
-It can be used to do range based filtering of partitions in pig scripts together with [[CoordinatorFunctionalSpec#DataInPartitionMax][dataInPartitionMax]] EL function.
+The =${coord:dataInPartitionMin(String name, String partition)}= EL function resolves to the *minimum* value of the
+specified partition for all the dataset instances specified in an input event dataset section. It can be used to do
+range based filtering of partitions in pig scripts together
+with [[CoordinatorFunctionalSpec#DataInPartitionMax][dataInPartitionMax]] EL function.
 
 Refer to the [[CoordinatorFunctionalSpec#HCatPigExampleTwo][Example]] below for usage.
 
 #DataInPartitionMax
 ---++++ 6.8.5 coord:dataInPartitionMax(String name, String partition) EL function
 
-The =${coord:dataInPartitionMax(String name, String partition)}= EL function resolves to the *maximum* value of the specified partition for all the dataset instances specified in an input event dataset section.
-It is a better practice to use =dataInPartitionMin= and =dataInPartitionMax= to form a range filter wherever possible instead of =datainPartitionPigFilter= as it will be more efficient for filtering.
+The =${coord:dataInPartitionMax(String name, String partition)}= EL function resolves to the *maximum* value of the
+specified partition for all the dataset instances specified in an input event dataset section. It is a better practice
+to use =dataInPartitionMin= and =dataInPartitionMax= to form a range filter wherever possible instead
+of =datainPartitionPigFilter= as it will be more efficient for filtering.
 
 Refer to the [[CoordinatorFunctionalSpec#HCatPigExampleTwo][Example]] below for usage.
 
 ---++++ 6.8.7 coord:dataOutPartitionValue(String name, String partition) EL function
 
-The =${coord:dataOutPartitionValue(String name, String partition)}= EL function resolves to value of the specified partition for the output-event dataset; that will be consumed by a workflow job, e.g Pig job triggered by a coordinator action.
-This is another convenience function to use a single partition-key's value if required, in addition to dataoutPartitions and either one can be used.
+The =${coord:dataOutPartitionValue(String name, String partition)}= EL function resolves to value of the specified
+partition for the output-event dataset; that will be consumed by a workflow job, e.g Pig job triggered by a
+coordinator action. This is another convenience function to use a single partition-key's value if required, in
+addition to =dataoutPartitionsPig()= and either one can be used.
 
-The example below illustrates a pig job triggered by a coordinator, using the aforementioned EL functions for input partition max/min values, output partition value, and database and table.
+The example below illustrates a pig job triggered by a coordinator, using the aforementioned EL functions for input
+partition max/min values, output partition value, and database and table.
 
 *%GREEN% Example: %ENDCOLOR%*
 
@@ -2479,11 +2531,11 @@ The example below illustrates a pig job 
           <configuration>
             <property>
               <name>IN_DB</name>
-              <value>${coord:database('raw-logs', 'input')}</value>
+              <value>${coord:databaseIn('raw-logs')}</value>
             </property>
             <property>
               <name>IN_TABLE</name>
-              <value>${coord:table('raw-logs', 'input')}</value>
+              <value>${coord:tableIn('raw-logs')}</value>
             </property>
             <property>
               <name>DATE_MIN</name>
@@ -2495,11 +2547,11 @@ The example below illustrates a pig job 
             </property>
             <property>
               <name>OUT_DB</name>
-              <value>${coord:database('processed-logs', 'output')}</value>
+              <value>${coord:databaseOut('processed-logs')}</value>
             </property>
             <property>
               <name>OUT_TABLE</name>
-              <value>${coord:table('processed-logs', 'output')}</value>
+              <value>${coord:tableOut('processed-logs')}</value>
             </property>
             <property>
               <name>OUT_PARTITION_VAL_REGION</name>
@@ -2517,16 +2569,21 @@ The example below illustrates a pig job 
 
 In this example, each coordinator action will use as input events the last 24 hourly instances of the 'logs' dataset.
 
-For the =2009-01-02T00:00Z= run, the =${coord:dataInPartitionMin('raw-logs','datestamp')}= function will resolve to the minimum of the 5 dataset instances for partition 'datestamp'
-i.e. among 2009010101, 2009010102, ...., 2009010123, 2009010200, the minimum would be "2009010101".
+For the =2009-01-02T00:00Z= run, the =${coord:dataInPartitionMin('raw-logs','datestamp')}= function will resolve to
+the minimum of the 5 dataset instances for partition 'datestamp'
+i.e. among 2009010101, 2009010102, ...., 2009010123,  2009010200, the minimum would be "2009010101".
 
-Similarly, the =${coord:dataInPartitionMax('raw-logs','datestamp')}= function will resolve to the maximum of the 5 dataset instances for partition 'datestamp'
+Similarly, the =${coord:dataInPartitionMax('raw-logs','datestamp')}= function will resolve to the maximum of the 5
+dataset instances for partition 'datestamp'
 i.e. among 2009010120, 2009010121, ...., 2009010123, 2009010200, the maximum would be "2009010200".
 
-Finally, the =${coord:dataOutPartitionValue(String name, String partition)}= function enables the coordinator application to pass a specified partition's value string needed by the HCatStorer in Pig job.
-The =${coord:dataOutPartitionValue('processed-logs','region')}= function will resolve to: "${region}" and =${coord:dataOutPartitionValue('processed-logs','datestamp')}= function will resolve to: "20090102".
+Finally, the =${coord:dataOutPartitionValue(String name, String partition)}= function enables the coordinator
+application to pass a specified partition's value string needed by the HCatStorer in Pig job.
+The =${coord:dataOutPartitionValue('processed-logs','region')}= function will resolve to: "${region}"
+and =${coord:dataOutPartitionValue('processed-logs','datestamp')}= function will resolve to: "20090102".
 
-For the workflow definition with <pig> action, refer to [[CoordinatorFunctionalSpec#HCatWorkflow][previous example]], with the following change in pig params in addition to database and table.
+For the workflow definition with <pig> action, refer to [[CoordinatorFunctionalSpec#HCatWorkflow][previous example]],
+with the following change in pig params in addition to database and table.
 
 <blockquote>
 ...
@@ -2539,7 +2596,8 @@ For the workflow definition with <pig> a
 </blockquote>
 
 *Example usage in Pig:*
-This illustrates another pig script which filters partitions based on range, with range limits parameterized with the EL funtions
+This illustrates another pig script which filters partitions based on range, with range limits parameterized with the
+EL funtions
 
 <blockquote>
 A = load '$HCAT_IN_DB.$HCAT_IN_TABLE' using org.apache.hcatalog.pig.HCatLoader();
@@ -2569,7 +2627,7 @@ The =${coord:dateOffset(String baseDate,
 
 For example, if baseDate is '2009-01-01T00:00Z', instance is '2' and timeUnit is 'MONTH', the return date will be '2009-03-01T00:00Z'. If baseDate is '2009-01-01T00:00Z', instance is '1' and timeUnit is 'YEAR', the return date will be '2010-01-01T00:00Z'.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 
 <verbatim>
@@ -2615,7 +2673,7 @@ For timezones that don't observe day lig
 
 For these timezones, dataset and application definitions, it suffices to express datetimes taking into account the timezone offset.
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 Coordinator application definition: A daily coordinator job for India timezone (+05:30) that consumes 24 hourly dataset instances from the previous day starting at the beginning of 2009 for a full year.
 
@@ -2864,7 +2922,7 @@ The coordinator application definition H
 All the coordinator job properties, the HDFS path for the coordinator application, the 'user.name' and 'oozie.job.acl'
 must be submitted to the Oozie coordinator engine using an XML configuration file (Hadoop XML configuration file).
 
-*%GREEN% Example: %ENDCOLOR%*
+*%GREEN% Example: %ENDCOLOR%*:
 
 <verbatim>
 <?xml version="1.0" encoding="UTF-8"?>

Modified: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README (original)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README Thu Feb  7 21:38:21 2013
@@ -5,14 +5,16 @@ Running the coordinator example with HCa
 3. Create 2 tables 'invites' (input) and 'oozie' (output) with this structure: "create table invites (foo INT, bar INT) partitioned by (ds STRING, region STRING)"
 4. Oozie distro should be built this way $> bin/mkdistro.sh -Dhcatalog.version=0.4.1 -DskipTests
 5. The 'libext' dir used by oozie-setup should contain the following list JARS
+   hcatalog-core.jar
+   webhcat-java-client.jar
    jackson-mapper-asl-1.8.8.jar
    jackson-core-asl-1.8.8.jar
-   hive-common-0.9.1.2.jar
-   hive-metastore-0.9.1.2.jar
-   hive-exec-0.9.1.2.jar
+   hive-common.jar
+   hive-metastore.jar
+   hive-exec.jar
+   hive-serde.jar
+   hive-shims.jar
    libfb303.jar
-   jdo2-api-2.3-ec.jar
-   datanucleus-core-2.0.3.jar
    (Note) hcatalog JARs will be automatically injected
 6. Upload this application directory to HDFS
 7. Run Oozie job using the job.properties. Coordinator actions will be in WAITING

Modified: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml?rev=1443738&r1=1443737&r2=1443738&view=diff
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml (original)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml Thu Feb  7 21:38:21 2013
@@ -45,27 +45,27 @@
             <configuration>
                 <property>
                     <name>DB</name>
-                    <value>${coord:database('input', 'input')}</value>
+                    <value>${coord:databaseIn('input')}</value>
                 </property>
                 <property>
                     <name>TABLE</name>
-                    <value>${coord:table('input', 'input')}</value>
+                    <value>${coord:tableIn('input')}</value>
                 </property>
                 <property>
                     <name>FILTER</name>
-                    <value>${coord:dataInPartitionFilter('input')}</value>
+                    <value>${coord:dataInPartitionFilter('input', 'pig')}</value>
                 </property>
                 <property>
                     <name>OUTPUT_PARTITION</name>
-                    <value>${coord:dataOutPartition('output')}</value>
+                    <value>${coord:dataOutPartitions('output')}</value>
                 </property>
                 <property>
                     <name>OUTPUT_DB</name>
-                    <value>${coord:database('output', 'output')}</value>
+                    <value>${coord:databaseOut('output')}</value>
                 </property>
                 <property>
                     <name>OUTPUT_TABLE</name>
-                    <value>${coord:table('output', 'output')}</value>
+                    <value>${coord:tableOut('output')}</value>
                 </property>
             </configuration>
         </workflow>