You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@falcon.apache.org by "Pragya Mittal (JIRA)" <ji...@apache.org> on 2015/06/08 09:04:00 UTC

[jira] [Updated] (FALCON-1260) Instance dependency API produces misleading results

     [ https://issues.apache.org/jira/browse/FALCON-1260?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Pragya Mittal updated FALCON-1260:
----------------------------------
    Description: 
I have the following definitions :
Process :
{code}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<process name="InstanceDependencyTest--agregator-coord16-4adb02c6" xmlns="uri:falcon:process:0.1">
<clusters>
<cluster name="InstanceDependencyTest--corp-a5b30313">
<validity start="2015-06-06T09:37Z" end="2015-06-06T10:37Z"/>
</cluster>
</clusters>
<parallel>5</parallel>
<order>FIFO</order>
<frequency>minutes(10)</frequency>
<timezone>UTC</timezone>
<inputs>
<input name="inputData" feed="InstanceDependencyTest--raaw-logs16-38bc46f1" start="now(0,-20)" end="now(0,0)"/>
</inputs>
<outputs>
<output name="outputData" feed="InstanceDependencyTest--agregated-logs16-e6cfefc5" instance="now(0,0)"/>
</outputs>
<properties>
<property name="queueName" value="default"/>
<property name="fileTime" value="${formatTime(dateOffset(instanceTime(), 1, 'DAY'), 'yyyy-MMM-dd')}"/>
<property name="user" value="${user()}"/>
</properties>
<workflow path="/tmp/falcon-regression/InstanceDependencyTest/aggregator"/>
<retry policy="periodic" delay="minutes(3)" attempts="3"/>
<ACL owner="pragya" group="dataqa" permission="*"/>
</process>
{code}

Input feed :
{code}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<feed name="InstanceDependencyTest--raaw-logs16-38bc46f1" description="clicks log" xmlns="uri:falcon:feed:0.1">
<partitions>
<partition name="country"/>
<partition name="colo"/>
</partitions>
<frequency>minutes(5)</frequency>
<timezone>UTC</timezone>
<late-arrival cut-off="hours(6)"/>
<clusters>
<cluster name="InstanceDependencyTest--corp-a5b30313" type="source">
<validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z"/>
<retention limit="months(9000)" action="delete"/>
</cluster>
</clusters>
<locations>
<location type="data" path="/tmp/falcon-regression/InstanceDependencyTest/input/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
<location type="stats" path="/projects/falcon/clicksStats"/>
<location type="meta" path="/projects/falcon/clicksMetaData"/>
</locations>
<ACL owner="pragya" group="dataqa" permission="*"/>
<schema location="/schema/clicks" provider="protobuf"/>
<properties>
<property name="field1" value="value1"/>
<property name="field2" value="value2"/>
</properties>
</feed>

{code}

Output feed:
{code}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<feed name="InstanceDependencyTest--agregated-logs16-e6cfefc5" description="clicks log" xmlns="uri:falcon:feed:0.1">
<frequency>minutes(5)</frequency>
<timezone>UTC</timezone>
<late-arrival cut-off="hours(6)"/>
<clusters>
<cluster name="InstanceDependencyTest--corp-a5b30313" type="source">
<validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z"/>
<retention limit="hours(6)" action="delete"/>
</cluster>
</clusters>
<locations>
<location type="data" path="/tmp/falcon-regression/InstanceDependencyTest/output-data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
<location type="stats" path="/projects/falcon/clicksStats"/>
<location type="meta" path="/projects/falcon/clicksMetaData"/>
</locations>
<ACL owner="pragya" group="dataqa" permission="*"/>
<schema location="/schema/clicks" provider="protobuf"/>
<properties>
<property name="field1" value="value1"/>
<property name="field2" value="value2"/>
</properties>
</feed>

{code}

Cluster :
{code}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cluster name="InstanceDependencyTest--corp-a5b30313" description="" colo="ua1" xmlns="uri:falcon:cluster:0.1">
<interfaces>
<interface type="readonly" endpoint="hdfs://192.168.138.200:8020" version="0.20.2"/>
<interface type="write" endpoint="hdfs://192.168.138.200:8020" version="0.20.2"/>
<interface type="execute" endpoint="192.168.138.200:8032" version="0.20.2"/>
<interface type="workflow" endpoint="http://192.168.138.200:11000/oozie/" version="3.1.0"/>
<interface type="messaging" endpoint="tcp://192.168.138.200:61616?daemon=true" version="5.1.6"/>
</interfaces>
<locations>
<location name="staging" path="/tmp/falcon-regression-staging"/>
<location name="working" path="/tmp/falcon-regression-working"/>
<location name="temp" path="/tmp"/>
</locations>
<ACL owner="pragya" group="dataqa" permission="*"/>
<properties>
<property name="field1" value="value1"/>
<property name="field2" value="value2"/>
</properties>
</cluster>
{code}

Usage :




> Instance dependency API produces misleading results
> ---------------------------------------------------
>
>                 Key: FALCON-1260
>                 URL: https://issues.apache.org/jira/browse/FALCON-1260
>             Project: Falcon
>          Issue Type: Bug
>          Components: feed, process
>         Environment: QA
>            Reporter: Pragya Mittal
>
> I have the following definitions :
> Process :
> {code}
> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
> <process name="InstanceDependencyTest--agregator-coord16-4adb02c6" xmlns="uri:falcon:process:0.1">
> <clusters>
> <cluster name="InstanceDependencyTest--corp-a5b30313">
> <validity start="2015-06-06T09:37Z" end="2015-06-06T10:37Z"/>
> </cluster>
> </clusters>
> <parallel>5</parallel>
> <order>FIFO</order>
> <frequency>minutes(10)</frequency>
> <timezone>UTC</timezone>
> <inputs>
> <input name="inputData" feed="InstanceDependencyTest--raaw-logs16-38bc46f1" start="now(0,-20)" end="now(0,0)"/>
> </inputs>
> <outputs>
> <output name="outputData" feed="InstanceDependencyTest--agregated-logs16-e6cfefc5" instance="now(0,0)"/>
> </outputs>
> <properties>
> <property name="queueName" value="default"/>
> <property name="fileTime" value="${formatTime(dateOffset(instanceTime(), 1, 'DAY'), 'yyyy-MMM-dd')}"/>
> <property name="user" value="${user()}"/>
> </properties>
> <workflow path="/tmp/falcon-regression/InstanceDependencyTest/aggregator"/>
> <retry policy="periodic" delay="minutes(3)" attempts="3"/>
> <ACL owner="pragya" group="dataqa" permission="*"/>
> </process>
> {code}
> Input feed :
> {code}
> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
> <feed name="InstanceDependencyTest--raaw-logs16-38bc46f1" description="clicks log" xmlns="uri:falcon:feed:0.1">
> <partitions>
> <partition name="country"/>
> <partition name="colo"/>
> </partitions>
> <frequency>minutes(5)</frequency>
> <timezone>UTC</timezone>
> <late-arrival cut-off="hours(6)"/>
> <clusters>
> <cluster name="InstanceDependencyTest--corp-a5b30313" type="source">
> <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z"/>
> <retention limit="months(9000)" action="delete"/>
> </cluster>
> </clusters>
> <locations>
> <location type="data" path="/tmp/falcon-regression/InstanceDependencyTest/input/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
> <location type="stats" path="/projects/falcon/clicksStats"/>
> <location type="meta" path="/projects/falcon/clicksMetaData"/>
> </locations>
> <ACL owner="pragya" group="dataqa" permission="*"/>
> <schema location="/schema/clicks" provider="protobuf"/>
> <properties>
> <property name="field1" value="value1"/>
> <property name="field2" value="value2"/>
> </properties>
> </feed>
> {code}
> Output feed:
> {code}
> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
> <feed name="InstanceDependencyTest--agregated-logs16-e6cfefc5" description="clicks log" xmlns="uri:falcon:feed:0.1">
> <frequency>minutes(5)</frequency>
> <timezone>UTC</timezone>
> <late-arrival cut-off="hours(6)"/>
> <clusters>
> <cluster name="InstanceDependencyTest--corp-a5b30313" type="source">
> <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z"/>
> <retention limit="hours(6)" action="delete"/>
> </cluster>
> </clusters>
> <locations>
> <location type="data" path="/tmp/falcon-regression/InstanceDependencyTest/output-data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
> <location type="stats" path="/projects/falcon/clicksStats"/>
> <location type="meta" path="/projects/falcon/clicksMetaData"/>
> </locations>
> <ACL owner="pragya" group="dataqa" permission="*"/>
> <schema location="/schema/clicks" provider="protobuf"/>
> <properties>
> <property name="field1" value="value1"/>
> <property name="field2" value="value2"/>
> </properties>
> </feed>
> {code}
> Cluster :
> {code}
> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
> <cluster name="InstanceDependencyTest--corp-a5b30313" description="" colo="ua1" xmlns="uri:falcon:cluster:0.1">
> <interfaces>
> <interface type="readonly" endpoint="hdfs://192.168.138.200:8020" version="0.20.2"/>
> <interface type="write" endpoint="hdfs://192.168.138.200:8020" version="0.20.2"/>
> <interface type="execute" endpoint="192.168.138.200:8032" version="0.20.2"/>
> <interface type="workflow" endpoint="http://192.168.138.200:11000/oozie/" version="3.1.0"/>
> <interface type="messaging" endpoint="tcp://192.168.138.200:61616?daemon=true" version="5.1.6"/>
> </interfaces>
> <locations>
> <location name="staging" path="/tmp/falcon-regression-staging"/>
> <location name="working" path="/tmp/falcon-regression-working"/>
> <location name="temp" path="/tmp"/>
> </locations>
> <ACL owner="pragya" group="dataqa" permission="*"/>
> <properties>
> <property name="field1" value="value1"/>
> <property name="field2" value="value2"/>
> </properties>
> </cluster>
> {code}
> Usage :



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)