You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@oozie.apache.org by Robert Kanter <rk...@cloudera.com> on 2013/11/07 20:10:28 UTC

Re: hi,how can I complete this DAG in a workflow using oozie?

When doing forks, I always find it helpful to write it out like this or
even to draw a picture:

start —> fork

fork —> shell1, shell2

shell1 —> fork1

fork1 —> shell3, join0

shell2 —> fork2

shell3 —> join

fork2 —> shell5, join0

join0 —> shell4

shell4 —> join

shell5 —> join

join —> end


The problem is that your inner forks (i.e. fork1 and fork2) are
“intersecting” with the outer fork (i.e. “fork”).  Each fork must end with
its own join (they come in pairs) and they shouldn’t be overlapping.  So,
the main issue is that you have fork1 and fork2 both matching with join0.


I’m actually not sure there is a valid equivalent of exactly what you have;
I think you’ll have to make some minor changes to your overall structure to
make it valid, but I could be wrong.



- Robert


On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:

>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:49 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
>
> sorry to forget my pic
>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:44 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
>
> hi,how can I complete this DAG in a workflow using oozie?
>  With oozie.wf.validate.ForkJoin=true,I can not do it.
> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
> this,but it can't run normally yet.
> Hope for help!
> 3x
>
> Here is my config
>
>  <!--
>   Licensed to the Apache Software Foundation (ASF) under one
>   or more contributor license agreements.  See the NOTICE file
>   distributed with this work for additional information
>   regarding copyright ownership.  The ASF licenses this file
>   to you under the Apache License, Version 2.0 (the
>   "License"); you may not use this file except in compliance
>   with the License.  You may obtain a copy of the License at
>
>        http://www.apache.org/licenses/LICENSE-2.0
>
>   Unless required by applicable law or agreed to in writing, software
>   distributed under the License is distributed on an "AS IS" BASIS,
>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>   See the License for the specific language governing permissions and
>   limitations under the License.
> -->
> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
>     <start to="fork"/>
>     <fork name="fork">
>         <path start="shell1"/>
>         <path start="shell2"/>
>     </fork>
>     <action name="shell1">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork1"/>
>         <error to="fail"/>
>     </action>
>     <fork name="fork1">
>         <path start="shell3"/>
>         <path start="join0"/>
>     </fork>
>     <action name="shell2">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>2</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork2"/>
>         <error to="fail"/>
>     </action>
>    <fork name="fork2">
>         <path start="shell5"/>
>         <path start="join0"/>
>     </fork>
>    <action name="shell3">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join0" to="shell4"/>
>    <action name="shell4">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>10</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>   <action name="shell5">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>15</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join" to="end"/>
>     <kill name="fail">
>         <message>Shell action failed, error
> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>     </kill>
>     <end name="end"/>
> </workflow-app>
>
>
> ------------------------------
> renguihe
>

Re: Re: hi,how can I complete this DAG in a workflow using oozie?

Posted by Alejandro Abdelnur <tu...@gmail.com>.
Henry,

the limitation of coupled forked joins for is to simplify tracking parallel
executions paths, a join expects all execution paths started from a same
fork it knows the count. Without this constraint, it would be much more
difficult to track things. When we implemented this we followed closely
what JBMP does, and if I recall correctly it has the same limitation. Said
this, IMO the limitation forces a cleaner WF design, easier to follow and
troubleshoot.

Going back to your particular problem, with the suggested approach (within
a single WF) you just lose a bit  of extra parallelism. Nothing else. For
simplicity, i would go this path.

Thanks.



On Fri, Nov 8, 2013 at 7:55 PM, renguihe <re...@ebupt.com> wrote:

> thx for replying!
> But I can't take your method because it changes the real dependency logic.
> In our real project ,there are 1000+ shell scripts forming a DAG and there
> are so many "M" type dependency in it,like this:
>
> 1->3
> 1->4
> 2->4
> 2->5
>
> if it can not be configured in one workflow ,I have to divide it to 3
> workflow ,each packaged in a coordinator(because runs everyday):
> 1->tag1->3
> 2->tag2->5
> detect tag1&2 ->4
>
> the "tag" action  is to create a dir in hdfs(using fs action) ,and "detect
> tag" action is to ensure the two dirs exist(using java action).
>
> This way brings several disgusting  problem:
> 1.I have to divide the 1000+ shell scripts manully(providing forming 300+
> worflow,each packaged in a coordinator )
> 2.I have to start all the 300+ coordinators at the beginning of the
> project beause of the dependency among coordinators (using tags and detect
> tags)
> 3.the 300+ "detect tag" actions at the begging of each coordinator will
> occupied 300+ map slots.
> 4.Can Oozie server hold 300+ concurrency?
>
> I want to know better ways to configure the 1000+ scripts in this project,
> and the reason why I can't put the "M" type dependency in a workflow
> beacause the "M" type is a DAG(fork-join grammer limitation or Oozie design
> does not aim at this)?
>
> wating for your help sincerely!
> thx!
>
>
>
> HENRY
>
> Sender: Alejandro Abdelnur
> Send Time: 2013-11-09 01:05
> Receiver: user@oozie.apache.org
> cc: user
> Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
> the best parallelism you can get for this in a wf is:
>
> fork 1, 2 join fork 3, 4, 5 join
>
> thx
>
> Alejandro
> (phone typing)
>
> On Nov 8, 2013, at 5:45, renguihe <re...@ebupt.com> wrote:
>
> > hi,
> > Add the error info I have got here when I try to  implement this DAG in
> a workflow:
> > 1->3
> > 1->4
> > 2->4
> > 2->5
> >
> > I set oozie.validate.ForkJoin=false in my job.properties.
> >
> > 2013-11-08 13:32:23,256  WARN CallableQueueService$CompositeCallable:542
> - USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[
> > -] exception callable [signal], E0709: Loop detected at runtime, node
> [join0]
> > org.apache.oozie.command.CommandException: E0709: Loop detected at
> runtime, node [join0]
> >        at
> org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:173)
> >        at
> org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:64)
> >        at org.apache.oozie.command.XCommand.call(XCommand.java:277)
> >        at
> org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:326)
> >        at
> org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:255)
> >        at
> org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175)
> >        at
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
> >        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
> >        at java.lang.Thread.run(Thread.java:662)
> > Caused by: org.apache.oozie.workflow.WorkflowException: E0709: Loop
> detected at runtime, node [join0]
> >        at
> org.apache.oozie.workflow.lite.ControlNodeHandler.loopDetection(ControlNodeHandler.java:126)
> >        at
> org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:206)
> >        at
> org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:290)
> >        at
> org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:170)
> >        ... 8 more
> >
> > Hope for your help!
> >
> >
> >
> >
> > HENRY
> >
> > Sender: [Email Address Not Verified]-renguihe@ebupt.com
> > Send Time: 2013-11-08 10:14
> > Receiver: user
> > Subject: Re: Re: hi,how can I complete this DAG in a workflow using
> oozie?
> > 3x for replying!
> > What I want to implement is this DAG(not contaning control flow nodes):
> > 1->3
> > 1->4
> > 2->4
> > 2->5
> > I want to put these 5 actions(shell actions or sth) in a workflow.
> > how can I do?
> > you mentioned that "Each fork must end with
> > its own join (they come in pairs) and they shouldn’t be overlapping."
> > I know that is the rule .
> > But when I try to implement this DAG,I have to break the rule according
> to this paragraph in Oozie document.
> >
> > "By default, Oozie performs some validation that any forking in a
> workflow is valid and won't lead to any incorrect behavior or instability.
> However, if Oozie is preventing a workflow from being submitted and you are
> very certain that it should work, you can disable forkjoin validation so
> that Oozie will accept the workflow. To disable this validation just for a
> specific workflow, simply set oozie.wf.validate.ForkJoin to false in the
> job.properties file. To disable this validation for all workflows, simply
> set =oozie.validate.ForkJoin= to false in the oozie-site.xml file.
> Disabling this validation is determined by the AND of both of these
> properties, so it will be disabled if either or both are set to false and
> only enabled if both are set to true (or not specified)."
> >
> > Hope for your help!
> >
> >
> >
> >
> > HENRY
> >
> > Sender: Robert Kanter
> > Send Time: 2013-11-08 03:10
> > Receiver: user@oozie.apache.org
> > Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
> > When doing forks, I always find it helpful to write it out like this or
> > even to draw a picture:
> >
> > start —> fork
> >
> > fork —> shell1, shell2
> >
> > shell1 —> fork1
> >
> > fork1 —> shell3, join0
> >
> > shell2 —> fork2
> >
> > shell3 —> join
> >
> > fork2 —> shell5, join0
> >
> > join0 —> shell4
> >
> > shell4 —> join
> >
> > shell5 —> join
> >
> > join —> end
> >
> >
> > The problem is that your inner forks (i.e. fork1 and fork2) are
> > “intersecting” with the outer fork (i.e. “fork”).  Each fork must end
> with
> > its own join (they come in pairs) and they shouldn’t be overlapping.  So,
> > the main issue is that you have fork1 and fork2 both matching with join0.
> >
> >
> > I’m actually not sure there is a valid equivalent of exactly what you
> have;
> > I think you’ll have to make some minor changes to your overall structure
> to
> > make it valid, but I could be wrong.
> >
> >
> >
> > - Robert
> >
> >
> > On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:
> >
> >>
> >>
> >> ------------------ Original ------------------
> >> *From: * "任桂禾"<re...@ebupt.com>;
> >> *Date: * Thu, Nov 7, 2013 10:49 PM
> >> *To: * "user"<us...@oozie.apache.org>;
> >> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
> >>
> >> sorry to forget my pic
> >>
> >>
> >> ------------------ Original ------------------
> >> *From: * "任桂禾"<re...@ebupt.com>;
> >> *Date: * Thu, Nov 7, 2013 10:44 PM
> >> *To: * "user"<us...@oozie.apache.org>;
> >> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
> >>
> >> hi,how can I complete this DAG in a workflow using oozie?
> >> With oozie.wf.validate.ForkJoin=true,I can not do it.
> >> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
> >> this,but it can't run normally yet.
> >> Hope for help!
> >> 3x
> >>
> >> Here is my config
> >>
> >> <!--
> >>  Licensed to the Apache Software Foundation (ASF) under one
> >>  or more contributor license agreements.  See the NOTICE file
> >>  distributed with this work for additional information
> >>  regarding copyright ownership.  The ASF licenses this file
> >>  to you under the Apache License, Version 2.0 (the
> >>  "License"); you may not use this file except in compliance
> >>  with the License.  You may obtain a copy of the License at
> >>
> >>       http://www.apache.org/licenses/LICENSE-2.0
> >>
> >>  Unless required by applicable law or agreed to in writing, software
> >>  distributed under the License is distributed on an "AS IS" BASIS,
> >>  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> >>  See the License for the specific language governing permissions and
> >>  limitations under the License.
> >> -->
> >> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
> >>    <start to="fork"/>
> >>    <fork name="fork">
> >>        <path start="shell1"/>
> >>        <path start="shell2"/>
> >>    </fork>
> >>    <action name="shell1">
> >>        <shell xmlns="uri:oozie:shell-action:0.2">
> >>            <job-tracker>${jobTracker}</job-tracker>
> >>            <name-node>${nameNode}</name-node>
> >>            <configuration>
> >>                <property>
> >>                    <name>mapred.job.queue.name</name>
> >>                    <value>${queueName}</value>
> >>                </property>
> >>            </configuration>
> >>            <exec>sleep</exec>
> >>            <argument>1</argument>
> >>            <capture-output/>
> >>        </shell>
> >>        <ok to="fork1"/>
> >>        <error to="fail"/>
> >>    </action>
> >>    <fork name="fork1">
> >>        <path start="shell3"/>
> >>        <path start="join0"/>
> >>    </fork>
> >>    <action name="shell2">
> >>        <shell xmlns="uri:oozie:shell-action:0.2">
> >>            <job-tracker>${jobTracker}</job-tracker>
> >>            <name-node>${nameNode}</name-node>
> >>            <configuration>
> >>                <property>
> >>                    <name>mapred.job.queue.name</name>
> >>                    <value>${queueName}</value>
> >>                </property>
> >>            </configuration>
> >>            <exec>sleep</exec>
> >>            <argument>2</argument>
> >>            <capture-output/>
> >>        </shell>
> >>        <ok to="fork2"/>
> >>        <error to="fail"/>
> >>    </action>
> >>   <fork name="fork2">
> >>        <path start="shell5"/>
> >>        <path start="join0"/>
> >>    </fork>
> >>   <action name="shell3">
> >>        <shell xmlns="uri:oozie:shell-action:0.2">
> >>            <job-tracker>${jobTracker}</job-tracker>
> >>            <name-node>${nameNode}</name-node>
> >>            <configuration>
> >>                <property>
> >>                    <name>mapred.job.queue.name</name>
> >>                    <value>${queueName}</value>
> >>                </property>
> >>            </configuration>
> >>            <exec>sleep</exec>
> >>            <argument>1</argument>
> >>            <capture-output/>
> >>        </shell>
> >>        <ok to="join"/>
> >>        <error to="fail"/>
> >>    </action>
> >>      <join name="join0" to="shell4"/>
> >>   <action name="shell4">
> >>        <shell xmlns="uri:oozie:shell-action:0.2">
> >>            <job-tracker>${jobTracker}</job-tracker>
> >>            <name-node>${nameNode}</name-node>
> >>            <configuration>
> >>                <property>
> >>                    <name>mapred.job.queue.name</name>
> >>                    <value>${queueName}</value>
> >>                </property>
> >>            </configuration>
> >>            <exec>sleep</exec>
> >>            <argument>10</argument>
> >>            <capture-output/>
> >>        </shell>
> >>        <ok to="join"/>
> >>        <error to="fail"/>
> >>    </action>
> >>  <action name="shell5">
> >>        <shell xmlns="uri:oozie:shell-action:0.2">
> >>            <job-tracker>${jobTracker}</job-tracker>
> >>            <name-node>${nameNode}</name-node>
> >>            <configuration>
> >>                <property>
> >>                    <name>mapred.job.queue.name</name>
> >>                    <value>${queueName}</value>
> >>                </property>
> >>            </configuration>
> >>            <exec>sleep</exec>
> >>            <argument>15</argument>
> >>            <capture-output/>
> >>        </shell>
> >>        <ok to="join"/>
> >>        <error to="fail"/>
> >>    </action>
> >>      <join name="join" to="end"/>
> >>    <kill name="fail">
> >>        <message>Shell action failed, error
> >> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
> >>    </kill>
> >>    <end name="end"/>
> >> </workflow-app>
> >>
> >>
> >> ------------------------------
> >> renguihe
>

Re: Re: hi,how can I complete this DAG in a workflow using oozie?

Posted by renguihe <re...@ebupt.com>.
thx for replying!
But I can't take your method because it changes the real dependency logic.
In our real project ,there are 1000+ shell scripts forming a DAG and there are so many "M" type dependency in it,like this:
 
1->3
1->4
2->4
2->5

if it can not be configured in one workflow ,I have to divide it to 3 workflow ,each packaged in a coordinator(because runs everyday):
1->tag1->3
2->tag2->5
detect tag1&2 ->4

the "tag" action  is to create a dir in hdfs(using fs action) ,and "detect tag" action is to ensure the two dirs exist(using java action).

This way brings several disgusting  problem:
1.I have to divide the 1000+ shell scripts manully(providing forming 300+ worflow,each packaged in a coordinator )
2.I have to start all the 300+ coordinators at the beginning of the project beause of the dependency among coordinators (using tags and detect tags)
3.the 300+ "detect tag" actions at the begging of each coordinator will occupied 300+ map slots.
4.Can Oozie server hold 300+ concurrency?

I want to know better ways to configure the 1000+ scripts in this project,
and the reason why I can't put the "M" type dependency in a workflow beacause the "M" type is a DAG(fork-join grammer limitation or Oozie design does not aim at this)?

wating for your help sincerely!
thx!



HENRY

Sender: Alejandro Abdelnur
Send Time: 2013-11-09 01:05
Receiver: user@oozie.apache.org
cc: user
Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
the best parallelism you can get for this in a wf is:

fork 1, 2 join fork 3, 4, 5 join

thx

Alejandro
(phone typing)

On Nov 8, 2013, at 5:45, renguihe <re...@ebupt.com> wrote:

> hi,
> Add the error info I have got here when I try to  implement this DAG in a workflow:
> 1->3
> 1->4
> 2->4
> 2->5
> 
> I set oozie.validate.ForkJoin=false in my job.properties.
> 
> 2013-11-08 13:32:23,256  WARN CallableQueueService$CompositeCallable:542 - USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[
> -] exception callable [signal], E0709: Loop detected at runtime, node [join0]
> org.apache.oozie.command.CommandException: E0709: Loop detected at runtime, node [join0]
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:173)
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:64)
>        at org.apache.oozie.command.XCommand.call(XCommand.java:277)
>        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:326)
>        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:255)
>        at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175)
>        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>        at java.lang.Thread.run(Thread.java:662)
> Caused by: org.apache.oozie.workflow.WorkflowException: E0709: Loop detected at runtime, node [join0]
>        at org.apache.oozie.workflow.lite.ControlNodeHandler.loopDetection(ControlNodeHandler.java:126)
>        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:206)
>        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:290)
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:170)
>        ... 8 more
> 
> Hope for your help!
> 
> 
> 
> 
> HENRY
> 
> Sender: [Email Address Not Verified]-renguihe@ebupt.com
> Send Time: 2013-11-08 10:14
> Receiver: user
> Subject: Re: Re: hi,how can I complete this DAG in a workflow using oozie?
> 3x for replying!
> What I want to implement is this DAG(not contaning control flow nodes):
> 1->3
> 1->4
> 2->4
> 2->5
> I want to put these 5 actions(shell actions or sth) in a workflow.
> how can I do?
> you mentioned that "Each fork must end with
> its own join (they come in pairs) and they shouldn’t be overlapping."
> I know that is the rule .
> But when I try to implement this DAG,I have to break the rule according to this paragraph in Oozie document.
> 
> "By default, Oozie performs some validation that any forking in a workflow is valid and won't lead to any incorrect behavior or instability. However, if Oozie is preventing a workflow from being submitted and you are very certain that it should work, you can disable forkjoin validation so that Oozie will accept the workflow. To disable this validation just for a specific workflow, simply set oozie.wf.validate.ForkJoin to false in the job.properties file. To disable this validation for all workflows, simply set =oozie.validate.ForkJoin= to false in the oozie-site.xml file. Disabling this validation is determined by the AND of both of these properties, so it will be disabled if either or both are set to false and only enabled if both are set to true (or not specified)."
> 
> Hope for your help!
> 
> 
> 
> 
> HENRY
> 
> Sender: Robert Kanter
> Send Time: 2013-11-08 03:10
> Receiver: user@oozie.apache.org
> Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
> When doing forks, I always find it helpful to write it out like this or
> even to draw a picture:
> 
> start —> fork
> 
> fork —> shell1, shell2
> 
> shell1 —> fork1
> 
> fork1 —> shell3, join0
> 
> shell2 —> fork2
> 
> shell3 —> join
> 
> fork2 —> shell5, join0
> 
> join0 —> shell4
> 
> shell4 —> join
> 
> shell5 —> join
> 
> join —> end
> 
> 
> The problem is that your inner forks (i.e. fork1 and fork2) are
> “intersecting” with the outer fork (i.e. “fork”).  Each fork must end with
> its own join (they come in pairs) and they shouldn’t be overlapping.  So,
> the main issue is that you have fork1 and fork2 both matching with join0.
> 
> 
> I’m actually not sure there is a valid equivalent of exactly what you have;
> I think you’ll have to make some minor changes to your overall structure to
> make it valid, but I could be wrong.
> 
> 
> 
> - Robert
> 
> 
> On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:
> 
>> 
>> 
>> ------------------ Original ------------------
>> *From: * "任桂禾"<re...@ebupt.com>;
>> *Date: * Thu, Nov 7, 2013 10:49 PM
>> *To: * "user"<us...@oozie.apache.org>;
>> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
>> 
>> sorry to forget my pic
>> 
>> 
>> ------------------ Original ------------------
>> *From: * "任桂禾"<re...@ebupt.com>;
>> *Date: * Thu, Nov 7, 2013 10:44 PM
>> *To: * "user"<us...@oozie.apache.org>;
>> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
>> 
>> hi,how can I complete this DAG in a workflow using oozie?
>> With oozie.wf.validate.ForkJoin=true,I can not do it.
>> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
>> this,but it can't run normally yet.
>> Hope for help!
>> 3x
>> 
>> Here is my config
>> 
>> <!--
>>  Licensed to the Apache Software Foundation (ASF) under one
>>  or more contributor license agreements.  See the NOTICE file
>>  distributed with this work for additional information
>>  regarding copyright ownership.  The ASF licenses this file
>>  to you under the Apache License, Version 2.0 (the
>>  "License"); you may not use this file except in compliance
>>  with the License.  You may obtain a copy of the License at
>> 
>>       http://www.apache.org/licenses/LICENSE-2.0
>> 
>>  Unless required by applicable law or agreed to in writing, software
>>  distributed under the License is distributed on an "AS IS" BASIS,
>>  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>  See the License for the specific language governing permissions and
>>  limitations under the License.
>> -->
>> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
>>    <start to="fork"/>
>>    <fork name="fork">
>>        <path start="shell1"/>
>>        <path start="shell2"/>
>>    </fork>
>>    <action name="shell1">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>1</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="fork1"/>
>>        <error to="fail"/>
>>    </action>
>>    <fork name="fork1">
>>        <path start="shell3"/>
>>        <path start="join0"/>
>>    </fork>
>>    <action name="shell2">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>2</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="fork2"/>
>>        <error to="fail"/>
>>    </action>
>>   <fork name="fork2">
>>        <path start="shell5"/>
>>        <path start="join0"/>
>>    </fork>
>>   <action name="shell3">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>1</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>      <join name="join0" to="shell4"/>
>>   <action name="shell4">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>10</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>  <action name="shell5">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>15</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>      <join name="join" to="end"/>
>>    <kill name="fail">
>>        <message>Shell action failed, error
>> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>>    </kill>
>>    <end name="end"/>
>> </workflow-app>
>> 
>> 
>> ------------------------------
>> renguihe

Re: hi,how can I complete this DAG in a workflow using oozie?

Posted by Alejandro Abdelnur <tu...@gmail.com>.
the best parallelism you can get for this in a wf is:

fork 1, 2 join fork 3, 4, 5 join

thx

Alejandro
(phone typing)

On Nov 8, 2013, at 5:45, renguihe <re...@ebupt.com> wrote:

> hi,
> Add the error info I have got here when I try to  implement this DAG in a workflow:
> 1->3
> 1->4
> 2->4
> 2->5
> 
> I set oozie.validate.ForkJoin=false in my job.properties.
> 
> 2013-11-08 13:32:23,256  WARN CallableQueueService$CompositeCallable:542 - USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[
> -] exception callable [signal], E0709: Loop detected at runtime, node [join0]
> org.apache.oozie.command.CommandException: E0709: Loop detected at runtime, node [join0]
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:173)
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:64)
>        at org.apache.oozie.command.XCommand.call(XCommand.java:277)
>        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:326)
>        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:255)
>        at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175)
>        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>        at java.lang.Thread.run(Thread.java:662)
> Caused by: org.apache.oozie.workflow.WorkflowException: E0709: Loop detected at runtime, node [join0]
>        at org.apache.oozie.workflow.lite.ControlNodeHandler.loopDetection(ControlNodeHandler.java:126)
>        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:206)
>        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:290)
>        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:170)
>        ... 8 more
> 
> Hope for your help!
> 
> 
> 
> 
> HENRY
> 
> Sender: [Email Address Not Verified]-renguihe@ebupt.com
> Send Time: 2013-11-08 10:14
> Receiver: user
> Subject: Re: Re: hi,how can I complete this DAG in a workflow using oozie?
> 3x for replying!
> What I want to implement is this DAG(not contaning control flow nodes):
> 1->3
> 1->4
> 2->4
> 2->5
> I want to put these 5 actions(shell actions or sth) in a workflow.
> how can I do?
> you mentioned that "Each fork must end with
> its own join (they come in pairs) and they shouldn’t be overlapping."
> I know that is the rule .
> But when I try to implement this DAG,I have to break the rule according to this paragraph in Oozie document.
> 
> "By default, Oozie performs some validation that any forking in a workflow is valid and won't lead to any incorrect behavior or instability. However, if Oozie is preventing a workflow from being submitted and you are very certain that it should work, you can disable forkjoin validation so that Oozie will accept the workflow. To disable this validation just for a specific workflow, simply set oozie.wf.validate.ForkJoin to false in the job.properties file. To disable this validation for all workflows, simply set =oozie.validate.ForkJoin= to false in the oozie-site.xml file. Disabling this validation is determined by the AND of both of these properties, so it will be disabled if either or both are set to false and only enabled if both are set to true (or not specified)."
> 
> Hope for your help!
> 
> 
> 
> 
> HENRY
> 
> Sender: Robert Kanter
> Send Time: 2013-11-08 03:10
> Receiver: user@oozie.apache.org
> Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
> When doing forks, I always find it helpful to write it out like this or
> even to draw a picture:
> 
> start —> fork
> 
> fork —> shell1, shell2
> 
> shell1 —> fork1
> 
> fork1 —> shell3, join0
> 
> shell2 —> fork2
> 
> shell3 —> join
> 
> fork2 —> shell5, join0
> 
> join0 —> shell4
> 
> shell4 —> join
> 
> shell5 —> join
> 
> join —> end
> 
> 
> The problem is that your inner forks (i.e. fork1 and fork2) are
> “intersecting” with the outer fork (i.e. “fork”).  Each fork must end with
> its own join (they come in pairs) and they shouldn’t be overlapping.  So,
> the main issue is that you have fork1 and fork2 both matching with join0.
> 
> 
> I’m actually not sure there is a valid equivalent of exactly what you have;
> I think you’ll have to make some minor changes to your overall structure to
> make it valid, but I could be wrong.
> 
> 
> 
> - Robert
> 
> 
> On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:
> 
>> 
>> 
>> ------------------ Original ------------------
>> *From: * "任桂禾"<re...@ebupt.com>;
>> *Date: * Thu, Nov 7, 2013 10:49 PM
>> *To: * "user"<us...@oozie.apache.org>;
>> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
>> 
>> sorry to forget my pic
>> 
>> 
>> ------------------ Original ------------------
>> *From: * "任桂禾"<re...@ebupt.com>;
>> *Date: * Thu, Nov 7, 2013 10:44 PM
>> *To: * "user"<us...@oozie.apache.org>;
>> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
>> 
>> hi,how can I complete this DAG in a workflow using oozie?
>> With oozie.wf.validate.ForkJoin=true,I can not do it.
>> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
>> this,but it can't run normally yet.
>> Hope for help!
>> 3x
>> 
>> Here is my config
>> 
>> <!--
>>  Licensed to the Apache Software Foundation (ASF) under one
>>  or more contributor license agreements.  See the NOTICE file
>>  distributed with this work for additional information
>>  regarding copyright ownership.  The ASF licenses this file
>>  to you under the Apache License, Version 2.0 (the
>>  "License"); you may not use this file except in compliance
>>  with the License.  You may obtain a copy of the License at
>> 
>>       http://www.apache.org/licenses/LICENSE-2.0
>> 
>>  Unless required by applicable law or agreed to in writing, software
>>  distributed under the License is distributed on an "AS IS" BASIS,
>>  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>  See the License for the specific language governing permissions and
>>  limitations under the License.
>> -->
>> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
>>    <start to="fork"/>
>>    <fork name="fork">
>>        <path start="shell1"/>
>>        <path start="shell2"/>
>>    </fork>
>>    <action name="shell1">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>1</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="fork1"/>
>>        <error to="fail"/>
>>    </action>
>>    <fork name="fork1">
>>        <path start="shell3"/>
>>        <path start="join0"/>
>>    </fork>
>>    <action name="shell2">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>2</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="fork2"/>
>>        <error to="fail"/>
>>    </action>
>>   <fork name="fork2">
>>        <path start="shell5"/>
>>        <path start="join0"/>
>>    </fork>
>>   <action name="shell3">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>1</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>      <join name="join0" to="shell4"/>
>>   <action name="shell4">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>10</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>  <action name="shell5">
>>        <shell xmlns="uri:oozie:shell-action:0.2">
>>            <job-tracker>${jobTracker}</job-tracker>
>>            <name-node>${nameNode}</name-node>
>>            <configuration>
>>                <property>
>>                    <name>mapred.job.queue.name</name>
>>                    <value>${queueName}</value>
>>                </property>
>>            </configuration>
>>            <exec>sleep</exec>
>>            <argument>15</argument>
>>            <capture-output/>
>>        </shell>
>>        <ok to="join"/>
>>        <error to="fail"/>
>>    </action>
>>      <join name="join" to="end"/>
>>    <kill name="fail">
>>        <message>Shell action failed, error
>> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>>    </kill>
>>    <end name="end"/>
>> </workflow-app>
>> 
>> 
>> ------------------------------
>> renguihe

Re: Re: hi,how can I complete this DAG in a workflow using oozie?

Posted by renguihe <re...@ebupt.com>.
hi,
Add the error info I have got here when I try to  implement this DAG in a workflow:
1->3
1->4
2->4
2->5

I set oozie.validate.ForkJoin=false in my job.properties.

2013-11-08 13:32:23,256  WARN CallableQueueService$CompositeCallable:542 - USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[
-] exception callable [signal], E0709: Loop detected at runtime, node [join0]
org.apache.oozie.command.CommandException: E0709: Loop detected at runtime, node [join0]
        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:173)
        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:64)
        at org.apache.oozie.command.XCommand.call(XCommand.java:277)
        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:326)
        at org.apache.oozie.service.CallableQueueService$CompositeCallable.call(CallableQueueService.java:255)
        at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175)
        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:662)
Caused by: org.apache.oozie.workflow.WorkflowException: E0709: Loop detected at runtime, node [join0]
        at org.apache.oozie.workflow.lite.ControlNodeHandler.loopDetection(ControlNodeHandler.java:126)
        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:206)
        at org.apache.oozie.workflow.lite.LiteWorkflowInstance.signal(LiteWorkflowInstance.java:290)
        at org.apache.oozie.command.wf.SignalXCommand.execute(SignalXCommand.java:170)
        ... 8 more

Hope for your help!




HENRY

Sender: [Email Address Not Verified]-renguihe@ebupt.com
Send Time: 2013-11-08 10:14
Receiver: user
Subject: Re: Re: hi,how can I complete this DAG in a workflow using oozie?
3x for replying!
What I want to implement is this DAG(not contaning control flow nodes):
1->3
1->4
2->4
2->5
I want to put these 5 actions(shell actions or sth) in a workflow.
how can I do?
you mentioned that "Each fork must end with
its own join (they come in pairs) and they shouldn’t be overlapping."
I know that is the rule .
But when I try to implement this DAG,I have to break the rule according to this paragraph in Oozie document.

"By default, Oozie performs some validation that any forking in a workflow is valid and won't lead to any incorrect behavior or instability. However, if Oozie is preventing a workflow from being submitted and you are very certain that it should work, you can disable forkjoin validation so that Oozie will accept the workflow. To disable this validation just for a specific workflow, simply set oozie.wf.validate.ForkJoin to false in the job.properties file. To disable this validation for all workflows, simply set =oozie.validate.ForkJoin= to false in the oozie-site.xml file. Disabling this validation is determined by the AND of both of these properties, so it will be disabled if either or both are set to false and only enabled if both are set to true (or not specified)."

Hope for your help!




HENRY

Sender: Robert Kanter
Send Time: 2013-11-08 03:10
Receiver: user@oozie.apache.org
Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
When doing forks, I always find it helpful to write it out like this or
even to draw a picture:

start —> fork

fork —> shell1, shell2

shell1 —> fork1

fork1 —> shell3, join0

shell2 —> fork2

shell3 —> join

fork2 —> shell5, join0

join0 —> shell4

shell4 —> join

shell5 —> join

join —> end


The problem is that your inner forks (i.e. fork1 and fork2) are
“intersecting” with the outer fork (i.e. “fork”).  Each fork must end with
its own join (they come in pairs) and they shouldn’t be overlapping.  So,
the main issue is that you have fork1 and fork2 both matching with join0.


I’m actually not sure there is a valid equivalent of exactly what you have;
I think you’ll have to make some minor changes to your overall structure to
make it valid, but I could be wrong.



- Robert


On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:

>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:49 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
>
> sorry to forget my pic
>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:44 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
>
> hi,how can I complete this DAG in a workflow using oozie?
>  With oozie.wf.validate.ForkJoin=true,I can not do it.
> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
> this,but it can't run normally yet.
> Hope for help!
> 3x
>
> Here is my config
>
>  <!--
>   Licensed to the Apache Software Foundation (ASF) under one
>   or more contributor license agreements.  See the NOTICE file
>   distributed with this work for additional information
>   regarding copyright ownership.  The ASF licenses this file
>   to you under the Apache License, Version 2.0 (the
>   "License"); you may not use this file except in compliance
>   with the License.  You may obtain a copy of the License at
>
>        http://www.apache.org/licenses/LICENSE-2.0
>
>   Unless required by applicable law or agreed to in writing, software
>   distributed under the License is distributed on an "AS IS" BASIS,
>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>   See the License for the specific language governing permissions and
>   limitations under the License.
> -->
> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
>     <start to="fork"/>
>     <fork name="fork">
>         <path start="shell1"/>
>         <path start="shell2"/>
>     </fork>
>     <action name="shell1">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork1"/>
>         <error to="fail"/>
>     </action>
>     <fork name="fork1">
>         <path start="shell3"/>
>         <path start="join0"/>
>     </fork>
>     <action name="shell2">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>2</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork2"/>
>         <error to="fail"/>
>     </action>
>    <fork name="fork2">
>         <path start="shell5"/>
>         <path start="join0"/>
>     </fork>
>    <action name="shell3">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join0" to="shell4"/>
>    <action name="shell4">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>10</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>   <action name="shell5">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>15</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join" to="end"/>
>     <kill name="fail">
>         <message>Shell action failed, error
> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>     </kill>
>     <end name="end"/>
> </workflow-app>
>
>
> ------------------------------
> renguihe
>

Re: Re: hi,how can I complete this DAG in a workflow using oozie?

Posted by renguihe <re...@ebupt.com>.
3x for replying!
What I want to implement is this DAG(not contaning control flow nodes):
1->3
1->4
2->4
2->5
I want to put these 5 actions(shell actions or sth) in a workflow.
how can I do?
you mentioned that "Each fork must end with
its own join (they come in pairs) and they shouldn’t be overlapping."
I know that is the rule .
But when I try to implement this DAG,I have to break the rule according to this paragraph in Oozie document.

"By default, Oozie performs some validation that any forking in a workflow is valid and won't lead to any incorrect behavior or instability. However, if Oozie is preventing a workflow from being submitted and you are very certain that it should work, you can disable forkjoin validation so that Oozie will accept the workflow. To disable this validation just for a specific workflow, simply set oozie.wf.validate.ForkJoin to false in the job.properties file. To disable this validation for all workflows, simply set =oozie.validate.ForkJoin= to false in the oozie-site.xml file. Disabling this validation is determined by the AND of both of these properties, so it will be disabled if either or both are set to false and only enabled if both are set to true (or not specified)."

Hope for your help!




HENRY

Sender: Robert Kanter
Send Time: 2013-11-08 03:10
Receiver: user@oozie.apache.org
Subject: Re: hi,how can I complete this DAG in a workflow using oozie?
When doing forks, I always find it helpful to write it out like this or
even to draw a picture:

start —> fork

fork —> shell1, shell2

shell1 —> fork1

fork1 —> shell3, join0

shell2 —> fork2

shell3 —> join

fork2 —> shell5, join0

join0 —> shell4

shell4 —> join

shell5 —> join

join —> end


The problem is that your inner forks (i.e. fork1 and fork2) are
“intersecting” with the outer fork (i.e. “fork”).  Each fork must end with
its own join (they come in pairs) and they shouldn’t be overlapping.  So,
the main issue is that you have fork1 and fork2 both matching with join0.


I’m actually not sure there is a valid equivalent of exactly what you have;
I think you’ll have to make some minor changes to your overall structure to
make it valid, but I could be wrong.



- Robert


On Thu, Nov 7, 2013 at 6:51 AM, 任桂禾 <re...@ebupt.com> wrote:

>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:49 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * Re:hi,how can I complete this DAG in a workflow using oozie?
>
> sorry to forget my pic
>
>
> ------------------ Original ------------------
> *From: * "任桂禾"<re...@ebupt.com>;
> *Date: * Thu, Nov 7, 2013 10:44 PM
> *To: * "user"<us...@oozie.apache.org>;
> *Subject: * hi,how can I complete this DAG in a workflow using oozie?
>
> hi,how can I complete this DAG in a workflow using oozie?
>  With oozie.wf.validate.ForkJoin=true,I can not do it.
> So I set oozie.wf.validate.ForkJoin=false ,and write a config like
> this,but it can't run normally yet.
> Hope for help!
> 3x
>
> Here is my config
>
>  <!--
>   Licensed to the Apache Software Foundation (ASF) under one
>   or more contributor license agreements.  See the NOTICE file
>   distributed with this work for additional information
>   regarding copyright ownership.  The ASF licenses this file
>   to you under the Apache License, Version 2.0 (the
>   "License"); you may not use this file except in compliance
>   with the License.  You may obtain a copy of the License at
>
>        http://www.apache.org/licenses/LICENSE-2.0
>
>   Unless required by applicable law or agreed to in writing, software
>   distributed under the License is distributed on an "AS IS" BASIS,
>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>   See the License for the specific language governing permissions and
>   limitations under the License.
> -->
> <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
>     <start to="fork"/>
>     <fork name="fork">
>         <path start="shell1"/>
>         <path start="shell2"/>
>     </fork>
>     <action name="shell1">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork1"/>
>         <error to="fail"/>
>     </action>
>     <fork name="fork1">
>         <path start="shell3"/>
>         <path start="join0"/>
>     </fork>
>     <action name="shell2">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>2</argument>
>             <capture-output/>
>         </shell>
>         <ok to="fork2"/>
>         <error to="fail"/>
>     </action>
>    <fork name="fork2">
>         <path start="shell5"/>
>         <path start="join0"/>
>     </fork>
>    <action name="shell3">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>1</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join0" to="shell4"/>
>    <action name="shell4">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>10</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>   <action name="shell5">
>         <shell xmlns="uri:oozie:shell-action:0.2">
>             <job-tracker>${jobTracker}</job-tracker>
>             <name-node>${nameNode}</name-node>
>             <configuration>
>                 <property>
>                     <name>mapred.job.queue.name</name>
>                     <value>${queueName}</value>
>                 </property>
>             </configuration>
>             <exec>sleep</exec>
>             <argument>15</argument>
>             <capture-output/>
>         </shell>
>         <ok to="join"/>
>         <error to="fail"/>
>     </action>
>       <join name="join" to="end"/>
>     <kill name="fail">
>         <message>Shell action failed, error
> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>     </kill>
>     <end name="end"/>
> </workflow-app>
>
>
> ------------------------------
> renguihe
>