You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@oozie.apache.org by "Virag Kothari (JIRA)" <ji...@apache.org> on 2012/05/13 06:43:12 UTC

[jira] [Updated] (OOZIE-636) Check fork and join in the workflow in the submission time

     [ https://issues.apache.org/jira/browse/OOZIE-636?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Virag Kothari updated OOZIE-636:
--------------------------------


For the patch committed in trunk with this JIRA ID (also available under the same JIRA ID in Apache ReviewBoard system), I grant license to ASF for inclusion in ASF works (as per the Apache License ยง5) 
                
> Check fork and join in the workflow in the submission time 
> -----------------------------------------------------------
>
>                 Key: OOZIE-636
>                 URL: https://issues.apache.org/jira/browse/OOZIE-636
>             Project: Oozie
>          Issue Type: Bug
>            Reporter: Virag Kothari
>            Assignee: Virag Kothari
>             Fix For: 3.2.0
>
>
> Enhancement: Oozie should check that the fork node and join node are correct in pair when user submits the job. This should be a static check, not when the workflow is running.
> Current logic bug:
> A workflow with different number of forks and joins was run. The wf job should have been killed but it succeeded. Also, strangely, the action was killed. 
> Following are the different types of tests run and their results with varying delays.
> test1: wf job SUCCEEDED, action java12 KILLED.
> delay11=11
> delay12=12
> delay121=1
> delay122=2
> delay21=1
> delay22=1
> test2: wf job SUCCEEDED, action java12 KILLED. 
> delay11=1
> delay12=12
> delay121=1
> delay122=2
> delay21=1
> delay22=1
> test3: wf job SUCCEEED, all actions OK. question: why wf job always pass in this scenario, even when fork-join not in
> pair?
> delay11=10
> delay12=10
> delay121=15
> delay122=15
> delay21=20
> delay22=20
> workflow.xml
> ============
> <workflow-app xmlns='uri:oozie:workflow:0.1' name='fork-join-4735180-wf'>
>     <start to='fork1' />
>     <fork name="fork1">
>         <path start="java11" />
>         <path start="fork12" />
>     </fork>
>     <action name='java11'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay11}</arg>
>         </java>
>         <ok to="java12" />
>         <error to="fail" />
>     </action>
>     <action name='java12'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay12}</arg>
>         </java>
>         <ok to="join1" />
>         <error to="fail" />
>     </action>
>     <fork name="fork12">
>         <path start="java121" />
>         <path start="java122" />
>     </fork>
>     <action name='java121'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay121}</arg>
>         </java>
>         <ok to="join12" />
>         <error to="fail" />
>     </action>
>     <action name='java122'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay122}</arg>
>         </java>
>         <ok to="join12" />
>         <error to="fail" />
>     </action>
>     <join name="join12" to="fork2" />
>     <fork name="fork2">
>         <path start="java21" />
>         <path start="java22" />
>     </fork>
>     <action name='java21'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay21}</arg>
>         </java>
>         <ok to="join1" />
>         <error to="fail" />
>     </action>
>     <action name='java22'>
>         <java>
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <main-class>qa.test.tests.testsleep</main-class>
>             <arg>${delay22}</arg>
>         </java>
>         <ok to="join1" />
>         <error to="fail" />
>     </action>
>     <join name="join1" to="end" />
>     <kill name="fail">
>         <message>Streaming Map/Reduce failed, error
> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>     </kill>
>     <end name='end' />
> </workflow-app>

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira