You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by vi...@apache.org on 2012/06/29 04:13:32 UTC

svn commit: r1355205 - in /incubator/oozie/trunk: ./ core/src/main/java/org/apache/oozie/command/coord/ core/src/test/java/org/apache/oozie/command/coord/ core/src/test/resources/

Author: virag
Date: Fri Jun 29 02:13:29 2012
New Revision: 1355205

URL: http://svn.apache.org/viewvc?rev=1355205&view=rev
Log:
OOZIE-860 start-instance and end-instance should limit 1 (britt via virag)

Added:
    incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml
    incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml
    incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml
    incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml
Modified:
    incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java
    incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java
    incubator/oozie/trunk/release-log.txt

Modified: incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java (original)
+++ incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java Fri Jun 29 02:13:29 2012
@@ -308,7 +308,7 @@ public class CoordSubmitXCommand extends
     }
 
   /*
-  * Check against multiple data instance values inside a single <instance> tag
+  * Check against multiple data instance values inside a single <instance> <start-instance> or <end-instance> tag
   * If found, the job is not submitted and user is informed to correct the error, instead of defaulting to the first instance value in the list
   */
     private void checkMultipleTimeInstances(Element eCoordJob, String eventType, String dataType) throws CoordinatorJobException {
@@ -339,6 +339,47 @@ public class CoordSubmitXCommand extends
                         handleExpresionWithMultipleInstances(eventType, dataType, instanceValue);
                     }
                 }
+
+                // In case of input-events, there can be multiple child <start-instance> datasets. Iterating to ensure none of them have errors
+                instanceSpecList = dataSpec.getChildren("start-instance", ns);
+                instanceIter = instanceSpecList.iterator();
+                while(instanceIter.hasNext()) {
+                    instance = ((Element) instanceIter.next());
+                    if(instance.getContentSize() == 0) { //empty string or whitespace
+                        throw new CoordinatorJobException(ErrorCode.E1021, "<start-instance> tag within " + eventType + " is empty!");
+                    }
+                    instanceValue = instance.getContent(0).toString();
+                    boolean isInvalid = false;
+                    try {
+                        isInvalid = evalAction.checkForExistence(instanceValue, ",");
+                    } catch (Exception e) {
+                        handleELParseException(eventType, dataType, instanceValue);
+                    }
+                    if (isInvalid) { // reaching this block implies start instance is not empty i.e. length > 0
+                        handleExpresionWithStartMultipleInstances(eventType, dataType, instanceValue);
+                    }
+                }
+
+                // In case of input-events, there can be multiple child <end-instance> datasets. Iterating to ensure none of them have errors
+                instanceSpecList = dataSpec.getChildren("end-instance", ns);
+                instanceIter = instanceSpecList.iterator();
+                while(instanceIter.hasNext()) {
+                    instance = ((Element) instanceIter.next());
+                    if(instance.getContentSize() == 0) { //empty string or whitespace
+                        throw new CoordinatorJobException(ErrorCode.E1021, "<end-instance> tag within " + eventType + " is empty!");
+                    }
+                    instanceValue = instance.getContent(0).toString();
+                    boolean isInvalid = false;
+                    try {
+                        isInvalid = evalAction.checkForExistence(instanceValue, ",");
+                    } catch (Exception e) {
+                        handleELParseException(eventType, dataType, instanceValue);
+                    }
+                    if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0
+                        handleExpresionWithMultipleEndInstances(eventType, dataType, instanceValue);
+                    }
+                }
+
             }
         }
     }
@@ -367,6 +408,30 @@ public class CoordSubmitXCommand extends
                 + "' contains more than one date instance. Coordinator job NOT SUBMITTED. " + correctAction);
     }
 
+    private void handleExpresionWithStartMultipleInstances(String eventType, String dataType, String instanceValue)
+            throws CoordinatorJobException {
+        String correctAction = null;
+        if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
+            correctAction = "Coordinator app definition should have separate <end-instance> tag per data-in start-instance";
+        } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
+            correctAction = "Coordinator app definition can have only one <end-instance> tag per data-out start-instance";
+        }
+        throw new CoordinatorJobException(ErrorCode.E1021, eventType + "end instance '" + instanceValue
+                + "' contains more than one date start-instance. Coordinator job NOT SUBMITTED. " + correctAction);
+    }
+
+    private void handleExpresionWithMultipleEndInstances(String eventType, String dataType, String instanceValue)
+            throws CoordinatorJobException {
+        String correctAction = null;
+        if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
+            correctAction = "Coordinator app definition should have separate <start-instance> tag per data-in end-instance";
+        } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
+            correctAction = "Coordinator app definition can have only one <start-instance> tag per data-out end-instance";
+        }
+        throw new CoordinatorJobException(ErrorCode.E1021, eventType + "end-instance '" + instanceValue
+                + "' contains more than one date end-instance. Coordinator job NOT SUBMITTED. " + correctAction);
+    }
+
     /**
      * Read the application XML and validate against coordinator Schema
      *

Modified: incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java (original)
+++ incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java Fri Jun 29 02:13:29 2012
@@ -203,6 +203,90 @@ public class TestCoordSubmitXCommand ext
     }
 
     /**
+     * Testing for when user tries to submit a coordinator application having data-in events
+     * that erroneously specify multiple input data instances inside a single <start-instance> tag.
+     * Job gives submission error and indicates appropriate correction
+     * Testing both negative(error) and well as positive(success) cases
+     */
+    public void testBasicSubmitWithMultipleStartInstancesInputEvent() throws Exception {
+        Configuration conf = new XConfiguration();
+        String appPath = "file://" + getTestCaseDir() + File.separator + "coordinator.xml";
+
+        // CASE 1: Failure case i.e. multiple data-in start-instances
+        Reader reader = IOUtils.getResourceAsReader("coord-multiple-input-start-instance1.xml", -1);
+        Writer writer = new FileWriter(new URI(appPath).getPath());
+        IOUtils.copyCharStream(reader, writer);
+        conf.set(OozieClient.COORDINATOR_APP_PATH, appPath);
+        conf.set(OozieClient.USER_NAME, getTestUser());
+        CoordSubmitXCommand sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+        try {
+            sc.call();
+            fail("Expected to catch errors due to incorrectly specified input data set start-instances");
+        }
+        catch (CommandException e) {
+            assertEquals(sc.getJob().getStatus(), Job.Status.FAILED);
+            assertEquals(e.getErrorCode(), ErrorCode.E1021);
+            assertTrue(e.getMessage().contains(sc.COORD_INPUT_EVENTS) && e.getMessage().contains("per data-in start-instance"));
+        }
+
+        // CASE 2: Success case i.e. Single start instances for input and single start instance for output, but both with ","
+        reader = IOUtils.getResourceAsReader("coord-multiple-input-start-instance2.xml", -1);
+        writer = new FileWriter(new URI(appPath).getPath());
+        IOUtils.copyCharStream(reader, writer);
+        sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+        try {
+            sc.call();
+        }
+        catch (CommandException e) {
+            fail("Unexpected failure: " + e);
+        }
+    }
+
+    /**
+     * Testing for when user tries to submit a coordinator application having data-in events
+     * that erroneously specify multiple input data instances inside a single <start-instance> tag.
+     * Job gives submission error and indicates appropriate correction
+     * Testing both negative(error) and well as positive(success) cases
+     */
+    public void testBasicSubmitWithMultipleEndInstancesInputEvent() throws Exception {
+        Configuration conf = new XConfiguration();
+        String appPath = "file://" + getTestCaseDir() + File.separator + "coordinator.xml";
+
+        // CASE 1: Failure case i.e. multiple data-in start-instances
+        Reader reader = IOUtils.getResourceAsReader("coord-multiple-input-end-instance1.xml", -1);
+        Writer writer = new FileWriter(new URI(appPath).getPath());
+        IOUtils.copyCharStream(reader, writer);
+        conf.set(OozieClient.COORDINATOR_APP_PATH, appPath);
+        conf.set(OozieClient.USER_NAME, getTestUser());
+        CoordSubmitXCommand sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+        try {
+            sc.call();
+            fail("Expected to catch errors due to incorrectly specified input data set end-instances");
+        }
+        catch (CommandException e) {
+            assertEquals(sc.getJob().getStatus(), Job.Status.FAILED);
+            assertEquals(e.getErrorCode(), ErrorCode.E1021);
+            assertTrue(e.getMessage().contains(sc.COORD_INPUT_EVENTS) && e.getMessage().contains("per data-in end-instance"));
+        }
+
+        // CASE 2: Success case i.e. Single end instances for input and single end instance for output, but both with ","
+        reader = IOUtils.getResourceAsReader("coord-multiple-input-end-instance2.xml", -1);
+        writer = new FileWriter(new URI(appPath).getPath());
+        IOUtils.copyCharStream(reader, writer);
+        sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+        try {
+            sc.call();
+        }
+        catch (CommandException e) {
+            fail("Unexpected failure: " + e);
+        }
+    }
+
+    /**
      * Testing for when user tries to submit a coordinator application having data-out events
      * that erroneously specify multiple output data instances inside a single <instance> tag.
      * Job gives submission error and indicates appropriate correction

Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+  <controls>
+    <concurrency>2</concurrency>
+    <execution>LIFO</execution>
+  </controls>
+  <datasets>
+    <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+    <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+  </datasets>
+  <input-events>
+    <data-in name="A" dataset="a">
+    <start-instance>${coord:latest(0)}</start-instance>
+    <end-instance>${coord:latest(0)},${coord:latest(-1)}</end-instance>
+    </data-in>
+  </input-events>
+  <output-events>
+    <data-out name="LOCAL_A" dataset="local_a">
+      <instance>${coord:current(-1)}</instance>
+    </data-out>
+  </output-events>
+  <action>
+    <workflow>
+      <app-path>hdfs:///tmp/workflows/</app-path>
+      <configuration>
+        <property>
+          <name>inputA</name>
+          <value>${coord:dataIn('A')}</value>
+        </property>
+        <property>
+          <name>inputB</name>
+          <value>${coord:dataOut('LOCAL_A')}</value>
+        </property>
+      </configuration>
+    </workflow>
+  </action>
+</coordinator-app>
\ No newline at end of file

Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+  <controls>
+    <concurrency>2</concurrency>
+    <execution>LIFO</execution>
+  </controls>
+  <datasets>
+    <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+    <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+  </datasets>
+  <input-events>
+    <data-in name="A" dataset="a">
+    <start-instance>${coord:future(0, 1)}</start-instance>
+    <end-instance>${coord:future(0, 1)}</end-instance>
+    </data-in>
+  </input-events>
+  <output-events>
+    <data-out name="LOCAL_A" dataset="local_a">
+      <instance>${coord:current(-1)}</instance>
+    </data-out>
+  </output-events>
+  <action>
+    <workflow>
+      <app-path>hdfs:///tmp/workflows/</app-path>
+      <configuration>
+        <property>
+          <name>inputA</name>
+          <value>${coord:dataIn('A')}</value>
+        </property>
+        <property>
+          <name>inputB</name>
+          <value>${coord:dataOut('LOCAL_A')}</value>
+        </property>
+      </configuration>
+    </workflow>
+  </action>
+</coordinator-app>
\ No newline at end of file

Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+  <controls>
+    <concurrency>2</concurrency>
+    <execution>LIFO</execution>
+  </controls>
+  <datasets>
+    <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+    <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+  </datasets>
+  <input-events>
+    <data-in name="A" dataset="a">
+    <start-instance>${coord:latest(-1)},${coord:latest(-2)}</start-instance>
+    <end-instance>${coord:latest(0)}</end-instance>
+    </data-in>
+  </input-events>
+  <output-events>
+    <data-out name="LOCAL_A" dataset="local_a">
+      <instance>${coord:current(-1)}</instance>
+    </data-out>
+  </output-events>
+  <action>
+    <workflow>
+      <app-path>hdfs:///tmp/workflows/</app-path>
+      <configuration>
+        <property>
+          <name>inputA</name>
+          <value>${coord:dataIn('A')}</value>
+        </property>
+        <property>
+          <name>inputB</name>
+          <value>${coord:dataOut('LOCAL_A')}</value>
+        </property>
+      </configuration>
+    </workflow>
+  </action>
+</coordinator-app>
\ No newline at end of file

Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+  <controls>
+    <concurrency>2</concurrency>
+    <execution>LIFO</execution>
+  </controls>
+  <datasets>
+    <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+    <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+        <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+    </dataset>
+  </datasets>
+  <input-events>
+    <data-in name="A" dataset="a">
+    <start-instance>${coord:future(0, 1)}</start-instance>
+    <end-instance>${coord:future(0, 1)}</end-instance>
+    </data-in>
+  </input-events>
+  <output-events>
+    <data-out name="LOCAL_A" dataset="local_a">
+      <instance>${coord:future(0,2)}</instance>
+    </data-out>
+  </output-events>
+  <action>
+    <workflow>
+      <app-path>hdfs:///tmp/workflows/</app-path>
+      <configuration>
+        <property>
+          <name>inputA</name>
+          <value>${coord:dataIn('A')}</value>
+        </property>
+        <property>
+          <name>inputB</name>
+          <value>${coord:dataOut('LOCAL_A')}</value>
+        </property>
+      </configuration>
+    </workflow>
+  </action>
+</coordinator-app>
\ No newline at end of file

Modified: incubator/oozie/trunk/release-log.txt
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Fri Jun 29 02:13:29 2012
@@ -1,5 +1,6 @@
 -- Oozie 3.3.0 release (trunk - unreleased)
 
+OOZIE-860 start-instance and end-instance should limit 1 (britt via virag)
 OOZIE-882 CoordELEvaluator.createDataEvaluator doesn't set timezone for coord action (shwethags via tucu)
 OOZIE-892 increase default JVM max memory to 1GB to avoid OOM (rkanter via tucu)
 OOZIE-861 allow for use of multiple <java-opts> (britt via virag)