You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by vi...@apache.org on 2012/06/29 04:13:32 UTC
svn commit: r1355205 - in /incubator/oozie/trunk: ./
core/src/main/java/org/apache/oozie/command/coord/
core/src/test/java/org/apache/oozie/command/coord/ core/src/test/resources/
Author: virag
Date: Fri Jun 29 02:13:29 2012
New Revision: 1355205
URL: http://svn.apache.org/viewvc?rev=1355205&view=rev
Log:
OOZIE-860 start-instance and end-instance should limit 1 (britt via virag)
Added:
incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml
incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml
incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml
incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java
incubator/oozie/trunk/release-log.txt
Modified: incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java (original)
+++ incubator/oozie/trunk/core/src/main/java/org/apache/oozie/command/coord/CoordSubmitXCommand.java Fri Jun 29 02:13:29 2012
@@ -308,7 +308,7 @@ public class CoordSubmitXCommand extends
}
/*
- * Check against multiple data instance values inside a single <instance> tag
+ * Check against multiple data instance values inside a single <instance> <start-instance> or <end-instance> tag
* If found, the job is not submitted and user is informed to correct the error, instead of defaulting to the first instance value in the list
*/
private void checkMultipleTimeInstances(Element eCoordJob, String eventType, String dataType) throws CoordinatorJobException {
@@ -339,6 +339,47 @@ public class CoordSubmitXCommand extends
handleExpresionWithMultipleInstances(eventType, dataType, instanceValue);
}
}
+
+ // In case of input-events, there can be multiple child <start-instance> datasets. Iterating to ensure none of them have errors
+ instanceSpecList = dataSpec.getChildren("start-instance", ns);
+ instanceIter = instanceSpecList.iterator();
+ while(instanceIter.hasNext()) {
+ instance = ((Element) instanceIter.next());
+ if(instance.getContentSize() == 0) { //empty string or whitespace
+ throw new CoordinatorJobException(ErrorCode.E1021, "<start-instance> tag within " + eventType + " is empty!");
+ }
+ instanceValue = instance.getContent(0).toString();
+ boolean isInvalid = false;
+ try {
+ isInvalid = evalAction.checkForExistence(instanceValue, ",");
+ } catch (Exception e) {
+ handleELParseException(eventType, dataType, instanceValue);
+ }
+ if (isInvalid) { // reaching this block implies start instance is not empty i.e. length > 0
+ handleExpresionWithStartMultipleInstances(eventType, dataType, instanceValue);
+ }
+ }
+
+ // In case of input-events, there can be multiple child <end-instance> datasets. Iterating to ensure none of them have errors
+ instanceSpecList = dataSpec.getChildren("end-instance", ns);
+ instanceIter = instanceSpecList.iterator();
+ while(instanceIter.hasNext()) {
+ instance = ((Element) instanceIter.next());
+ if(instance.getContentSize() == 0) { //empty string or whitespace
+ throw new CoordinatorJobException(ErrorCode.E1021, "<end-instance> tag within " + eventType + " is empty!");
+ }
+ instanceValue = instance.getContent(0).toString();
+ boolean isInvalid = false;
+ try {
+ isInvalid = evalAction.checkForExistence(instanceValue, ",");
+ } catch (Exception e) {
+ handleELParseException(eventType, dataType, instanceValue);
+ }
+ if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0
+ handleExpresionWithMultipleEndInstances(eventType, dataType, instanceValue);
+ }
+ }
+
}
}
}
@@ -367,6 +408,30 @@ public class CoordSubmitXCommand extends
+ "' contains more than one date instance. Coordinator job NOT SUBMITTED. " + correctAction);
}
+ private void handleExpresionWithStartMultipleInstances(String eventType, String dataType, String instanceValue)
+ throws CoordinatorJobException {
+ String correctAction = null;
+ if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
+ correctAction = "Coordinator app definition should have separate <end-instance> tag per data-in start-instance";
+ } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
+ correctAction = "Coordinator app definition can have only one <end-instance> tag per data-out start-instance";
+ }
+ throw new CoordinatorJobException(ErrorCode.E1021, eventType + "end instance '" + instanceValue
+ + "' contains more than one date start-instance. Coordinator job NOT SUBMITTED. " + correctAction);
+ }
+
+ private void handleExpresionWithMultipleEndInstances(String eventType, String dataType, String instanceValue)
+ throws CoordinatorJobException {
+ String correctAction = null;
+ if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
+ correctAction = "Coordinator app definition should have separate <start-instance> tag per data-in end-instance";
+ } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
+ correctAction = "Coordinator app definition can have only one <start-instance> tag per data-out end-instance";
+ }
+ throw new CoordinatorJobException(ErrorCode.E1021, eventType + "end-instance '" + instanceValue
+ + "' contains more than one date end-instance. Coordinator job NOT SUBMITTED. " + correctAction);
+ }
+
/**
* Read the application XML and validate against coordinator Schema
*
Modified: incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java (original)
+++ incubator/oozie/trunk/core/src/test/java/org/apache/oozie/command/coord/TestCoordSubmitXCommand.java Fri Jun 29 02:13:29 2012
@@ -203,6 +203,90 @@ public class TestCoordSubmitXCommand ext
}
/**
+ * Testing for when user tries to submit a coordinator application having data-in events
+ * that erroneously specify multiple input data instances inside a single <start-instance> tag.
+ * Job gives submission error and indicates appropriate correction
+ * Testing both negative(error) and well as positive(success) cases
+ */
+ public void testBasicSubmitWithMultipleStartInstancesInputEvent() throws Exception {
+ Configuration conf = new XConfiguration();
+ String appPath = "file://" + getTestCaseDir() + File.separator + "coordinator.xml";
+
+ // CASE 1: Failure case i.e. multiple data-in start-instances
+ Reader reader = IOUtils.getResourceAsReader("coord-multiple-input-start-instance1.xml", -1);
+ Writer writer = new FileWriter(new URI(appPath).getPath());
+ IOUtils.copyCharStream(reader, writer);
+ conf.set(OozieClient.COORDINATOR_APP_PATH, appPath);
+ conf.set(OozieClient.USER_NAME, getTestUser());
+ CoordSubmitXCommand sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+ try {
+ sc.call();
+ fail("Expected to catch errors due to incorrectly specified input data set start-instances");
+ }
+ catch (CommandException e) {
+ assertEquals(sc.getJob().getStatus(), Job.Status.FAILED);
+ assertEquals(e.getErrorCode(), ErrorCode.E1021);
+ assertTrue(e.getMessage().contains(sc.COORD_INPUT_EVENTS) && e.getMessage().contains("per data-in start-instance"));
+ }
+
+ // CASE 2: Success case i.e. Single start instances for input and single start instance for output, but both with ","
+ reader = IOUtils.getResourceAsReader("coord-multiple-input-start-instance2.xml", -1);
+ writer = new FileWriter(new URI(appPath).getPath());
+ IOUtils.copyCharStream(reader, writer);
+ sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+ try {
+ sc.call();
+ }
+ catch (CommandException e) {
+ fail("Unexpected failure: " + e);
+ }
+ }
+
+ /**
+ * Testing for when user tries to submit a coordinator application having data-in events
+ * that erroneously specify multiple input data instances inside a single <start-instance> tag.
+ * Job gives submission error and indicates appropriate correction
+ * Testing both negative(error) and well as positive(success) cases
+ */
+ public void testBasicSubmitWithMultipleEndInstancesInputEvent() throws Exception {
+ Configuration conf = new XConfiguration();
+ String appPath = "file://" + getTestCaseDir() + File.separator + "coordinator.xml";
+
+ // CASE 1: Failure case i.e. multiple data-in start-instances
+ Reader reader = IOUtils.getResourceAsReader("coord-multiple-input-end-instance1.xml", -1);
+ Writer writer = new FileWriter(new URI(appPath).getPath());
+ IOUtils.copyCharStream(reader, writer);
+ conf.set(OozieClient.COORDINATOR_APP_PATH, appPath);
+ conf.set(OozieClient.USER_NAME, getTestUser());
+ CoordSubmitXCommand sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+ try {
+ sc.call();
+ fail("Expected to catch errors due to incorrectly specified input data set end-instances");
+ }
+ catch (CommandException e) {
+ assertEquals(sc.getJob().getStatus(), Job.Status.FAILED);
+ assertEquals(e.getErrorCode(), ErrorCode.E1021);
+ assertTrue(e.getMessage().contains(sc.COORD_INPUT_EVENTS) && e.getMessage().contains("per data-in end-instance"));
+ }
+
+ // CASE 2: Success case i.e. Single end instances for input and single end instance for output, but both with ","
+ reader = IOUtils.getResourceAsReader("coord-multiple-input-end-instance2.xml", -1);
+ writer = new FileWriter(new URI(appPath).getPath());
+ IOUtils.copyCharStream(reader, writer);
+ sc = new CoordSubmitXCommand(conf, "UNIT_TESTING");
+
+ try {
+ sc.call();
+ }
+ catch (CommandException e) {
+ fail("Unexpected failure: " + e);
+ }
+ }
+
+ /**
* Testing for when user tries to submit a coordinator application having data-out events
* that erroneously specify multiple output data instances inside a single <instance> tag.
* Job gives submission error and indicates appropriate correction
Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance1.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+ <controls>
+ <concurrency>2</concurrency>
+ <execution>LIFO</execution>
+ </controls>
+ <datasets>
+ <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ </datasets>
+ <input-events>
+ <data-in name="A" dataset="a">
+ <start-instance>${coord:latest(0)}</start-instance>
+ <end-instance>${coord:latest(0)},${coord:latest(-1)}</end-instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="LOCAL_A" dataset="local_a">
+ <instance>${coord:current(-1)}</instance>
+ </data-out>
+ </output-events>
+ <action>
+ <workflow>
+ <app-path>hdfs:///tmp/workflows/</app-path>
+ <configuration>
+ <property>
+ <name>inputA</name>
+ <value>${coord:dataIn('A')}</value>
+ </property>
+ <property>
+ <name>inputB</name>
+ <value>${coord:dataOut('LOCAL_A')}</value>
+ </property>
+ </configuration>
+ </workflow>
+ </action>
+</coordinator-app>
\ No newline at end of file
Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-end-instance2.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+ <controls>
+ <concurrency>2</concurrency>
+ <execution>LIFO</execution>
+ </controls>
+ <datasets>
+ <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ </datasets>
+ <input-events>
+ <data-in name="A" dataset="a">
+ <start-instance>${coord:future(0, 1)}</start-instance>
+ <end-instance>${coord:future(0, 1)}</end-instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="LOCAL_A" dataset="local_a">
+ <instance>${coord:current(-1)}</instance>
+ </data-out>
+ </output-events>
+ <action>
+ <workflow>
+ <app-path>hdfs:///tmp/workflows/</app-path>
+ <configuration>
+ <property>
+ <name>inputA</name>
+ <value>${coord:dataIn('A')}</value>
+ </property>
+ <property>
+ <name>inputB</name>
+ <value>${coord:dataOut('LOCAL_A')}</value>
+ </property>
+ </configuration>
+ </workflow>
+ </action>
+</coordinator-app>
\ No newline at end of file
Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance1.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+ <controls>
+ <concurrency>2</concurrency>
+ <execution>LIFO</execution>
+ </controls>
+ <datasets>
+ <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ </datasets>
+ <input-events>
+ <data-in name="A" dataset="a">
+ <start-instance>${coord:latest(-1)},${coord:latest(-2)}</start-instance>
+ <end-instance>${coord:latest(0)}</end-instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="LOCAL_A" dataset="local_a">
+ <instance>${coord:current(-1)}</instance>
+ </data-out>
+ </output-events>
+ <action>
+ <workflow>
+ <app-path>hdfs:///tmp/workflows/</app-path>
+ <configuration>
+ <property>
+ <name>inputA</name>
+ <value>${coord:dataIn('A')}</value>
+ </property>
+ <property>
+ <name>inputB</name>
+ <value>${coord:dataOut('LOCAL_A')}</value>
+ </property>
+ </configuration>
+ </workflow>
+ </action>
+</coordinator-app>
\ No newline at end of file
Added: incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml?rev=1355205&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml (added)
+++ incubator/oozie/trunk/core/src/test/resources/coord-multiple-input-start-instance2.xml Fri Jun 29 02:13:29 2012
@@ -0,0 +1,57 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<coordinator-app xmlns="uri:oozie:coordinator:0.2" name="NAME" frequency="${coord:days(1)}" start="2009-02-01T01:00Z" end="2009-02-03T23:59Z" timezone="UTC">
+ <controls>
+ <concurrency>2</concurrency>
+ <execution>LIFO</execution>
+ </controls>
+ <datasets>
+ <dataset name="a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ <dataset name="local_a" frequency="${coord:days(7)}" initial-instance="2009-02-01T01:00Z" timezone="UTC">
+ <uri-template>file:///tmp/coord/workflows/${YEAR}/${DAY}</uri-template>
+ </dataset>
+ </datasets>
+ <input-events>
+ <data-in name="A" dataset="a">
+ <start-instance>${coord:future(0, 1)}</start-instance>
+ <end-instance>${coord:future(0, 1)}</end-instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="LOCAL_A" dataset="local_a">
+ <instance>${coord:future(0,2)}</instance>
+ </data-out>
+ </output-events>
+ <action>
+ <workflow>
+ <app-path>hdfs:///tmp/workflows/</app-path>
+ <configuration>
+ <property>
+ <name>inputA</name>
+ <value>${coord:dataIn('A')}</value>
+ </property>
+ <property>
+ <name>inputB</name>
+ <value>${coord:dataOut('LOCAL_A')}</value>
+ </property>
+ </configuration>
+ </workflow>
+ </action>
+</coordinator-app>
\ No newline at end of file
Modified: incubator/oozie/trunk/release-log.txt
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1355205&r1=1355204&r2=1355205&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Fri Jun 29 02:13:29 2012
@@ -1,5 +1,6 @@
-- Oozie 3.3.0 release (trunk - unreleased)
+OOZIE-860 start-instance and end-instance should limit 1 (britt via virag)
OOZIE-882 CoordELEvaluator.createDataEvaluator doesn't set timezone for coord action (shwethags via tucu)
OOZIE-892 increase default JVM max memory to 1GB to avoid OOM (rkanter via tucu)
OOZIE-861 allow for use of multiple <java-opts> (britt via virag)